github.com/apache/arrow/go/v16@v16.1.0/arrow/compute/internal/kernels/cast_numeric_avx2_amd64.s (about)

     1  //go:build !noasm && !appengine && go1.18
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x43e0000000000000
     5  DATA LCDATA1<>+0x008(SB)/8, $0x4330000000000000
     6  DATA LCDATA1<>+0x010(SB)/8, $0x4530000000000000
     7  DATA LCDATA1<>+0x018(SB)/8, $0x4530000000100000
     8  DATA LCDATA1<>+0x020(SB)/8, $0x0000000000000001
     9  DATA LCDATA1<>+0x028(SB)/8, $0x4f0000005f000000
    10  DATA LCDATA1<>+0x030(SB)/8, $0x4b00000080000000
    11  DATA LCDATA1<>+0x038(SB)/8, $0x5300008053000000
    12  DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000800
    13  DATA LCDATA1<>+0x048(SB)/8, $0x0000000000000000
    14  DATA LCDATA1<>+0x050(SB)/8, $0x4530000043300000
    15  DATA LCDATA1<>+0x058(SB)/8, $0x0000000000000000
    16  DATA LCDATA1<>+0x060(SB)/8, $0x4330000000000000
    17  DATA LCDATA1<>+0x068(SB)/8, $0x4530000000000000
    18  DATA LCDATA1<>+0x070(SB)/8, $0x000000000c080400
    19  DATA LCDATA1<>+0x078(SB)/8, $0x0000000000000000
    20  DATA LCDATA1<>+0x080(SB)/8, $0x0d0c090805040100
    21  DATA LCDATA1<>+0x088(SB)/8, $0x0f0e0d0c0d0c0908
    22  DATA LCDATA1<>+0x090(SB)/8, $0x1d1c191815141110
    23  DATA LCDATA1<>+0x098(SB)/8, $0x1f1e1d1c1d1c1918
    24  DATA LCDATA1<>+0x0a0(SB)/8, $0x00ff00ff00ff00ff
    25  DATA LCDATA1<>+0x0a8(SB)/8, $0x00ff00ff00ff00ff
    26  DATA LCDATA1<>+0x0b0(SB)/8, $0x00ff00ff00ff00ff
    27  DATA LCDATA1<>+0x0b8(SB)/8, $0x00ff00ff00ff00ff
    28  GLOBL LCDATA1<>(SB), 8, $192
    29  
    30  TEXT ยท_cast_type_numeric_avx2(SB), $0-40
    31  
    32  	MOVQ itype+0(FP), DI
    33  	MOVQ otype+8(FP), SI
    34  	MOVQ in+16(FP), DX
    35  	MOVQ out+24(FP), CX
    36  	MOVQ len+32(FP), R8
    37  	LEAQ LCDATA1<>(SB), BP
    38  
    39  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    40  	JG   LBB0_13
    41  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
    42  	JLE  LBB0_25
    43  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
    44  	JE   LBB0_45
    45  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
    46  	JE   LBB0_53
    47  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    48  	JNE  LBB0_1553
    49  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
    50  	JG   LBB0_93
    51  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
    52  	JLE  LBB0_163
    53  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
    54  	JE   LBB0_263
    55  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
    56  	JE   LBB0_266
    57  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
    58  	JNE  LBB0_1553
    59  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
    60  	JLE  LBB0_1553
    61  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
    62  	LONG $0x20f88341         // cmp    r8d, 32
    63  	JB   LBB0_12
    64  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
    65  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
    66  	JBE  LBB0_742
    67  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
    68  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
    69  	JBE  LBB0_742
    70  
    71  LBB0_12:
    72  	WORD $0xf631 // xor    esi, esi
    73  
    74  LBB0_1189:
    75  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
    76  	WORD $0xf749; BYTE $0xd0 // not    r8
    77  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
    78  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
    79  	LONG $0x03e78348         // and    rdi, 3
    80  	JE   LBB0_1191
    81  
    82  LBB0_1190:
    83  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
    84  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
    85  	LONG $0x01c68348         // add    rsi, 1
    86  	LONG $0xffc78348         // add    rdi, -1
    87  	JNE  LBB0_1190
    88  
    89  LBB0_1191:
    90  	LONG $0x03f88349 // cmp    r8, 3
    91  	JB   LBB0_1553
    92  
    93  LBB0_1192:
    94  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
    95  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
    96  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
    97  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
    98  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
    99  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
   100  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
   101  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
   102  	LONG $0x04c68348         // add    rsi, 4
   103  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
   104  	JNE  LBB0_1192
   105  	JMP  LBB0_1553
   106  
   107  LBB0_13:
   108  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   109  	JLE  LBB0_35
   110  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   111  	JE   LBB0_61
   112  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   113  	JE   LBB0_69
   114  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   115  	JNE  LBB0_1553
   116  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   117  	JG   LBB0_100
   118  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   119  	JLE  LBB0_168
   120  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   121  	JE   LBB0_269
   122  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   123  	JE   LBB0_272
   124  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   125  	JNE  LBB0_1553
   126  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   127  	JLE  LBB0_1553
   128  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   129  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   130  	WORD $0xf089             // mov    eax, esi
   131  	WORD $0xe083; BYTE $0x03 // and    eax, 3
   132  	LONG $0x03ff8348         // cmp    rdi, 3
   133  	JAE  LBB0_446
   134  	WORD $0xff31             // xor    edi, edi
   135  	JMP  LBB0_448
   136  
   137  LBB0_25:
   138  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   139  	JE   LBB0_77
   140  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   141  	JNE  LBB0_1553
   142  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   143  	JG   LBB0_107
   144  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   145  	JLE  LBB0_173
   146  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   147  	JE   LBB0_275
   148  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   149  	JE   LBB0_278
   150  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   151  	JNE  LBB0_1553
   152  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   153  	JLE  LBB0_1553
   154  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   155  	LONG $0x20f88341         // cmp    r8d, 32
   156  	JB   LBB0_34
   157  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   158  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   159  	JBE  LBB0_745
   160  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   161  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   162  	JBE  LBB0_745
   163  
   164  LBB0_34:
   165  	WORD $0xf631 // xor    esi, esi
   166  
   167  LBB0_1197:
   168  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   169  	WORD $0xf749; BYTE $0xd0 // not    r8
   170  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   171  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   172  	LONG $0x03e78348         // and    rdi, 3
   173  	JE   LBB0_1199
   174  
   175  LBB0_1198:
   176  	LONG $0x3204be0f         // movsx    eax, byte [rdx + rsi]
   177  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   178  	LONG $0x01c68348         // add    rsi, 1
   179  	LONG $0xffc78348         // add    rdi, -1
   180  	JNE  LBB0_1198
   181  
   182  LBB0_1199:
   183  	LONG $0x03f88349 // cmp    r8, 3
   184  	JB   LBB0_1553
   185  
   186  LBB0_1200:
   187  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
   188  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
   189  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
   190  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
   191  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
   192  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
   193  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
   194  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
   195  	LONG $0x04c68348             // add    rsi, 4
   196  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   197  	JNE  LBB0_1200
   198  	JMP  LBB0_1553
   199  
   200  LBB0_35:
   201  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
   202  	JE   LBB0_85
   203  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   204  	JNE  LBB0_1553
   205  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   206  	JG   LBB0_114
   207  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   208  	JLE  LBB0_178
   209  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   210  	JE   LBB0_281
   211  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   212  	JE   LBB0_284
   213  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   214  	JNE  LBB0_1553
   215  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   216  	JLE  LBB0_1553
   217  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   218  	LONG $0x10f88341         // cmp    r8d, 16
   219  	JAE  LBB0_454
   220  	WORD $0xf631             // xor    esi, esi
   221  	JMP  LBB0_918
   222  
   223  LBB0_45:
   224  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   225  	JG   LBB0_121
   226  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   227  	JLE  LBB0_183
   228  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   229  	JE   LBB0_287
   230  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   231  	JE   LBB0_290
   232  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   233  	JNE  LBB0_1553
   234  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   235  	JLE  LBB0_1553
   236  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   237  	LONG $0x20f88341         // cmp    r8d, 32
   238  	JAE  LBB0_457
   239  	WORD $0xf631             // xor    esi, esi
   240  	JMP  LBB0_1024
   241  
   242  LBB0_53:
   243  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   244  	JG   LBB0_128
   245  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   246  	JLE  LBB0_188
   247  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   248  	JE   LBB0_293
   249  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   250  	JE   LBB0_296
   251  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   252  	JNE  LBB0_1553
   253  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   254  	JLE  LBB0_1553
   255  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   256  	LONG $0x20f88341         // cmp    r8d, 32
   257  	JAE  LBB0_460
   258  	WORD $0xf631             // xor    esi, esi
   259  	JMP  LBB0_1029
   260  
   261  LBB0_61:
   262  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   263  	JG   LBB0_135
   264  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   265  	JLE  LBB0_193
   266  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   267  	JE   LBB0_299
   268  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   269  	JE   LBB0_302
   270  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   271  	JNE  LBB0_1553
   272  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   273  	JLE  LBB0_1553
   274  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   275  	LONG $0x10f88341         // cmp    r8d, 16
   276  	JAE  LBB0_463
   277  	WORD $0xf631             // xor    esi, esi
   278  	JMP  LBB0_1034
   279  
   280  LBB0_69:
   281  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   282  	JG   LBB0_142
   283  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   284  	JLE  LBB0_198
   285  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   286  	JE   LBB0_305
   287  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   288  	JE   LBB0_308
   289  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   290  	JNE  LBB0_1553
   291  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   292  	JLE  LBB0_1553
   293  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   294  	LONG $0x10f88341         // cmp    r8d, 16
   295  	JAE  LBB0_466
   296  	WORD $0xf631             // xor    esi, esi
   297  	JMP  LBB0_1039
   298  
   299  LBB0_77:
   300  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   301  	JG   LBB0_149
   302  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   303  	JLE  LBB0_203
   304  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   305  	JE   LBB0_311
   306  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   307  	JE   LBB0_314
   308  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   309  	JNE  LBB0_1553
   310  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   311  	JLE  LBB0_1553
   312  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   313  	LONG $0x20f88341         // cmp    r8d, 32
   314  	JB   LBB0_84
   315  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   316  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   317  	JBE  LBB0_748
   318  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   319  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   320  	JBE  LBB0_748
   321  
   322  LBB0_84:
   323  	WORD $0xf631 // xor    esi, esi
   324  
   325  LBB0_1205:
   326  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   327  	WORD $0xf749; BYTE $0xd0 // not    r8
   328  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   329  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   330  	LONG $0x03e78348         // and    rdi, 3
   331  	JE   LBB0_1207
   332  
   333  LBB0_1206:
   334  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
   335  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   336  	LONG $0x01c68348         // add    rsi, 1
   337  	LONG $0xffc78348         // add    rdi, -1
   338  	JNE  LBB0_1206
   339  
   340  LBB0_1207:
   341  	LONG $0x03f88349 // cmp    r8, 3
   342  	JB   LBB0_1553
   343  
   344  LBB0_1208:
   345  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   346  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
   347  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   348  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
   349  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   350  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
   351  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   352  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
   353  	LONG $0x04c68348             // add    rsi, 4
   354  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   355  	JNE  LBB0_1208
   356  	JMP  LBB0_1553
   357  
   358  LBB0_85:
   359  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   360  	JG   LBB0_156
   361  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   362  	JLE  LBB0_208
   363  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   364  	JE   LBB0_317
   365  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   366  	JE   LBB0_320
   367  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   368  	JNE  LBB0_1553
   369  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   370  	JLE  LBB0_1553
   371  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   372  	LONG $0x20f88341         // cmp    r8d, 32
   373  	JB   LBB0_92
   374  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
   375  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   376  	JBE  LBB0_751
   377  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   378  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   379  	JBE  LBB0_751
   380  
   381  LBB0_92:
   382  	WORD $0xf631 // xor    esi, esi
   383  
   384  LBB0_1213:
   385  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   386  	WORD $0xf749; BYTE $0xd0 // not    r8
   387  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   388  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   389  	LONG $0x03e78348         // and    rdi, 3
   390  	JE   LBB0_1215
   391  
   392  LBB0_1214:
   393  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   394  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   395  	LONG $0x01c68348         // add    rsi, 1
   396  	LONG $0xffc78348         // add    rdi, -1
   397  	JNE  LBB0_1214
   398  
   399  LBB0_1215:
   400  	LONG $0x03f88349 // cmp    r8, 3
   401  	JB   LBB0_1553
   402  
   403  LBB0_1216:
   404  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   405  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   406  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
   407  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
   408  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
   409  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
   410  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
   411  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
   412  	LONG $0x04c68348         // add    rsi, 4
   413  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
   414  	JNE  LBB0_1216
   415  	JMP  LBB0_1553
   416  
   417  LBB0_93:
   418  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   419  	JLE  LBB0_213
   420  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   421  	JE   LBB0_323
   422  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   423  	JE   LBB0_326
   424  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   425  	JNE  LBB0_1553
   426  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   427  	JLE  LBB0_1553
   428  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   429  	LONG $0x10f88341         // cmp    r8d, 16
   430  	JAE  LBB0_475
   431  	WORD $0xf631             // xor    esi, esi
   432  	JMP  LBB0_1044
   433  
   434  LBB0_100:
   435  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   436  	JLE  LBB0_218
   437  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   438  	JE   LBB0_329
   439  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   440  	JE   LBB0_332
   441  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   442  	JNE  LBB0_1553
   443  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   444  	JLE  LBB0_1553
   445  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   446  	LONG $0x10f88341         // cmp    r8d, 16
   447  	JB   LBB0_106
   448  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   449  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   450  	JBE  LBB0_754
   451  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
   452  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   453  	JBE  LBB0_754
   454  
   455  LBB0_106:
   456  	WORD $0xf631 // xor    esi, esi
   457  
   458  LBB0_1221:
   459  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   460  	WORD $0xf748; BYTE $0xd0 // not    rax
   461  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
   462  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   463  	LONG $0x07e78348         // and    rdi, 7
   464  	JE   LBB0_1223
   465  
   466  LBB0_1222:
   467  	LONG $0xf21c8b48 // mov    rbx, qword [rdx + 8*rsi]
   468  	LONG $0xf11c8948 // mov    qword [rcx + 8*rsi], rbx
   469  	LONG $0x01c68348 // add    rsi, 1
   470  	LONG $0xffc78348 // add    rdi, -1
   471  	JNE  LBB0_1222
   472  
   473  LBB0_1223:
   474  	LONG $0x07f88348 // cmp    rax, 7
   475  	JB   LBB0_1553
   476  
   477  LBB0_1224:
   478  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
   479  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
   480  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
   481  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
   482  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
   483  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
   484  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
   485  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
   486  	LONG $0xf2448b48; BYTE $0x20 // mov    rax, qword [rdx + 8*rsi + 32]
   487  	LONG $0xf1448948; BYTE $0x20 // mov    qword [rcx + 8*rsi + 32], rax
   488  	LONG $0xf2448b48; BYTE $0x28 // mov    rax, qword [rdx + 8*rsi + 40]
   489  	LONG $0xf1448948; BYTE $0x28 // mov    qword [rcx + 8*rsi + 40], rax
   490  	LONG $0xf2448b48; BYTE $0x30 // mov    rax, qword [rdx + 8*rsi + 48]
   491  	LONG $0xf1448948; BYTE $0x30 // mov    qword [rcx + 8*rsi + 48], rax
   492  	LONG $0xf2448b48; BYTE $0x38 // mov    rax, qword [rdx + 8*rsi + 56]
   493  	LONG $0xf1448948; BYTE $0x38 // mov    qword [rcx + 8*rsi + 56], rax
   494  	LONG $0x08c68348             // add    rsi, 8
   495  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   496  	JNE  LBB0_1224
   497  	JMP  LBB0_1553
   498  
   499  LBB0_107:
   500  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   501  	JLE  LBB0_223
   502  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   503  	JE   LBB0_335
   504  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   505  	JE   LBB0_338
   506  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   507  	JNE  LBB0_1553
   508  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   509  	JLE  LBB0_1553
   510  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   511  	LONG $0x10f88341         // cmp    r8d, 16
   512  	JB   LBB0_113
   513  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   514  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   515  	JBE  LBB0_757
   516  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
   517  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   518  	JBE  LBB0_757
   519  
   520  LBB0_113:
   521  	WORD $0xf631 // xor    esi, esi
   522  
   523  LBB0_1229:
   524  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   525  	WORD $0xf749; BYTE $0xd0 // not    r8
   526  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   527  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   528  	LONG $0x03e78348         // and    rdi, 3
   529  	JE   LBB0_1231
   530  
   531  LBB0_1230:
   532  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
   533  	LONG $0xc02adbc5             // vcvtsi2sd    xmm0, xmm4, eax
   534  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
   535  	LONG $0x01c68348             // add    rsi, 1
   536  	LONG $0xffc78348             // add    rdi, -1
   537  	JNE  LBB0_1230
   538  
   539  LBB0_1231:
   540  	LONG $0x03f88349 // cmp    r8, 3
   541  	JB   LBB0_1553
   542  
   543  LBB0_1232:
   544  	LONG $0x3204be0f               // movsx    eax, byte [rdx + rsi]
   545  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   546  	LONG $0x0411fbc5; BYTE $0xf1   // vmovsd    qword [rcx + 8*rsi], xmm0
   547  	LONG $0x3244be0f; BYTE $0x01   // movsx    eax, byte [rdx + rsi + 1]
   548  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   549  	LONG $0x4411fbc5; WORD $0x08f1 // vmovsd    qword [rcx + 8*rsi + 8], xmm0
   550  	LONG $0x3244be0f; BYTE $0x02   // movsx    eax, byte [rdx + rsi + 2]
   551  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   552  	LONG $0x4411fbc5; WORD $0x10f1 // vmovsd    qword [rcx + 8*rsi + 16], xmm0
   553  	LONG $0x3244be0f; BYTE $0x03   // movsx    eax, byte [rdx + rsi + 3]
   554  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   555  	LONG $0x4411fbc5; WORD $0x18f1 // vmovsd    qword [rcx + 8*rsi + 24], xmm0
   556  	LONG $0x04c68348               // add    rsi, 4
   557  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
   558  	JNE  LBB0_1232
   559  	JMP  LBB0_1553
   560  
   561  LBB0_114:
   562  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   563  	JLE  LBB0_228
   564  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   565  	JE   LBB0_341
   566  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   567  	JE   LBB0_344
   568  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   569  	JNE  LBB0_1553
   570  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   571  	JLE  LBB0_1553
   572  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   573  	LONG $0x10f88341         // cmp    r8d, 16
   574  	JAE  LBB0_484
   575  	WORD $0xf631             // xor    esi, esi
   576  	JMP  LBB0_923
   577  
   578  LBB0_121:
   579  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   580  	JLE  LBB0_233
   581  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   582  	JE   LBB0_347
   583  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   584  	JE   LBB0_350
   585  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   586  	JNE  LBB0_1553
   587  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   588  	JLE  LBB0_1553
   589  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   590  	LONG $0x10f88341         // cmp    r8d, 16
   591  	JAE  LBB0_487
   592  	WORD $0xf631             // xor    esi, esi
   593  	JMP  LBB0_1049
   594  
   595  LBB0_128:
   596  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   597  	JLE  LBB0_238
   598  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   599  	JE   LBB0_353
   600  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   601  	JE   LBB0_356
   602  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   603  	JNE  LBB0_1553
   604  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   605  	JLE  LBB0_1553
   606  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   607  	LONG $0x10f88341         // cmp    r8d, 16
   608  	JAE  LBB0_490
   609  	WORD $0xf631             // xor    esi, esi
   610  	JMP  LBB0_1054
   611  
   612  LBB0_135:
   613  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   614  	JLE  LBB0_243
   615  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   616  	JE   LBB0_359
   617  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   618  	JE   LBB0_362
   619  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   620  	JNE  LBB0_1553
   621  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   622  	JLE  LBB0_1553
   623  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   624  	LONG $0x10f88341         // cmp    r8d, 16
   625  	JAE  LBB0_493
   626  	WORD $0xf631             // xor    esi, esi
   627  	JMP  LBB0_1059
   628  
   629  LBB0_142:
   630  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   631  	JLE  LBB0_248
   632  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   633  	JE   LBB0_365
   634  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   635  	JE   LBB0_368
   636  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   637  	JNE  LBB0_1553
   638  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   639  	JLE  LBB0_1553
   640  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   641  	LONG $0x10f88341         // cmp    r8d, 16
   642  	JAE  LBB0_496
   643  	WORD $0xf631             // xor    esi, esi
   644  	JMP  LBB0_1064
   645  
   646  LBB0_149:
   647  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   648  	JLE  LBB0_253
   649  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   650  	JE   LBB0_371
   651  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   652  	JE   LBB0_374
   653  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   654  	JNE  LBB0_1553
   655  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   656  	JLE  LBB0_1553
   657  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   658  	LONG $0x10f88341         // cmp    r8d, 16
   659  	JB   LBB0_155
   660  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   661  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   662  	JBE  LBB0_760
   663  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
   664  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   665  	JBE  LBB0_760
   666  
   667  LBB0_155:
   668  	WORD $0xf631 // xor    esi, esi
   669  
   670  LBB0_1237:
   671  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   672  	WORD $0xf749; BYTE $0xd0 // not    r8
   673  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   674  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   675  	LONG $0x03e78348         // and    rdi, 3
   676  	JE   LBB0_1239
   677  
   678  LBB0_1238:
   679  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   680  	LONG $0xc02adbc5             // vcvtsi2sd    xmm0, xmm4, eax
   681  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
   682  	LONG $0x01c68348             // add    rsi, 1
   683  	LONG $0xffc78348             // add    rdi, -1
   684  	JNE  LBB0_1238
   685  
   686  LBB0_1239:
   687  	LONG $0x03f88349 // cmp    r8, 3
   688  	JB   LBB0_1553
   689  
   690  LBB0_1240:
   691  	LONG $0x3204b60f               // movzx    eax, byte [rdx + rsi]
   692  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   693  	LONG $0x0411fbc5; BYTE $0xf1   // vmovsd    qword [rcx + 8*rsi], xmm0
   694  	LONG $0x3244b60f; BYTE $0x01   // movzx    eax, byte [rdx + rsi + 1]
   695  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   696  	LONG $0x4411fbc5; WORD $0x08f1 // vmovsd    qword [rcx + 8*rsi + 8], xmm0
   697  	LONG $0x3244b60f; BYTE $0x02   // movzx    eax, byte [rdx + rsi + 2]
   698  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   699  	LONG $0x4411fbc5; WORD $0x10f1 // vmovsd    qword [rcx + 8*rsi + 16], xmm0
   700  	LONG $0x3244b60f; BYTE $0x03   // movzx    eax, byte [rdx + rsi + 3]
   701  	LONG $0xc02adbc5               // vcvtsi2sd    xmm0, xmm4, eax
   702  	LONG $0x4411fbc5; WORD $0x18f1 // vmovsd    qword [rcx + 8*rsi + 24], xmm0
   703  	LONG $0x04c68348               // add    rsi, 4
   704  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
   705  	JNE  LBB0_1240
   706  	JMP  LBB0_1553
   707  
   708  LBB0_156:
   709  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   710  	JLE  LBB0_258
   711  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   712  	JE   LBB0_377
   713  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   714  	JE   LBB0_380
   715  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   716  	JNE  LBB0_1553
   717  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   718  	JLE  LBB0_1553
   719  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   720  	LONG $0x10f88341         // cmp    r8d, 16
   721  	JAE  LBB0_502
   722  	WORD $0xf631             // xor    esi, esi
   723  	JMP  LBB0_929
   724  
   725  LBB0_163:
   726  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   727  	JE   LBB0_383
   728  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   729  	JNE  LBB0_1553
   730  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   731  	JLE  LBB0_1553
   732  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   733  	LONG $0x20f88341         // cmp    r8d, 32
   734  	JB   LBB0_167
   735  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
   736  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   737  	JBE  LBB0_763
   738  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   739  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   740  	JBE  LBB0_763
   741  
   742  LBB0_167:
   743  	WORD $0xf631 // xor    esi, esi
   744  
   745  LBB0_1245:
   746  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   747  	WORD $0xf749; BYTE $0xd0 // not    r8
   748  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   749  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   750  	LONG $0x03e78348         // and    rdi, 3
   751  	JE   LBB0_1247
   752  
   753  LBB0_1246:
   754  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
   755  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   756  	LONG $0x01c68348         // add    rsi, 1
   757  	LONG $0xffc78348         // add    rdi, -1
   758  	JNE  LBB0_1246
   759  
   760  LBB0_1247:
   761  	LONG $0x03f88349 // cmp    r8, 3
   762  	JB   LBB0_1553
   763  
   764  LBB0_1248:
   765  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
   766  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   767  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
   768  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   769  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
   770  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   771  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
   772  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   773  	LONG $0x04c68348             // add    rsi, 4
   774  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   775  	JNE  LBB0_1248
   776  	JMP  LBB0_1553
   777  
   778  LBB0_168:
   779  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   780  	JE   LBB0_386
   781  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   782  	JNE  LBB0_1553
   783  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   784  	JLE  LBB0_1553
   785  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   786  	LONG $0x10f88341         // cmp    r8d, 16
   787  	JB   LBB0_172
   788  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   789  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   790  	JBE  LBB0_766
   791  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   792  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   793  	JBE  LBB0_766
   794  
   795  LBB0_172:
   796  	WORD $0xf631 // xor    esi, esi
   797  
   798  LBB0_1253:
   799  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   800  	WORD $0xf748; BYTE $0xd0 // not    rax
   801  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
   802  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   803  	LONG $0x03e78348         // and    rdi, 3
   804  	JE   LBB0_1255
   805  
   806  LBB0_1254:
   807  	LONG $0x1c2cfbc5; BYTE $0xf2 // vcvttsd2si    ebx, qword [rdx + 8*rsi]
   808  	WORD $0x1c88; BYTE $0x31     // mov    byte [rcx + rsi], bl
   809  	LONG $0x01c68348             // add    rsi, 1
   810  	LONG $0xffc78348             // add    rdi, -1
   811  	JNE  LBB0_1254
   812  
   813  LBB0_1255:
   814  	LONG $0x03f88348 // cmp    rax, 3
   815  	JB   LBB0_1553
   816  
   817  LBB0_1256:
   818  	LONG $0x042cfbc5; BYTE $0xf2   // vcvttsd2si    eax, qword [rdx + 8*rsi]
   819  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
   820  	LONG $0x442cfbc5; WORD $0x08f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 8]
   821  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
   822  	LONG $0x442cfbc5; WORD $0x10f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 16]
   823  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
   824  	LONG $0x442cfbc5; WORD $0x18f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 24]
   825  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
   826  	LONG $0x04c68348               // add    rsi, 4
   827  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
   828  	JNE  LBB0_1256
   829  	JMP  LBB0_1553
   830  
   831  LBB0_173:
   832  	WORD $0xfe83; BYTE $0x02                   // cmp    esi, 2
   833  	JE   LBB0_389
   834  	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
   835  	JNE  LBB0_1553
   836  	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
   837  	JLE  LBB0_1553
   838  	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
   839  	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
   840  	JB   LBB0_177
   841  	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
   842  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
   843  	JBE  LBB0_769
   844  	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
   845  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
   846  	JBE  LBB0_769
   847  
   848  LBB0_177:
   849  	WORD $0xf631 // xor    esi, esi
   850  
   851  LBB0_1261:
   852  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   853  	WORD $0xf749; BYTE $0xd0 // not    r8
   854  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   855  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   856  	LONG $0x03e78348         // and    rdi, 3
   857  	JE   LBB0_1263
   858  
   859  LBB0_1262:
   860  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
   861  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   862  	LONG $0x01c68348         // add    rsi, 1
   863  	LONG $0xffc78348         // add    rdi, -1
   864  	JNE  LBB0_1262
   865  
   866  LBB0_1263:
   867  	LONG $0x03f88349 // cmp    r8, 3
   868  	JB   LBB0_1553
   869  
   870  LBB0_1264:
   871  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   872  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   873  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   874  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   875  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   876  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   877  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   878  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   879  	LONG $0x04c68348             // add    rsi, 4
   880  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   881  	JNE  LBB0_1264
   882  	JMP  LBB0_1553
   883  
   884  LBB0_178:
   885  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   886  	JE   LBB0_392
   887  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   888  	JNE  LBB0_1553
   889  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   890  	JLE  LBB0_1553
   891  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   892  	LONG $0x10f88341         // cmp    r8d, 16
   893  	JB   LBB0_182
   894  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   895  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   896  	JBE  LBB0_772
   897  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   898  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   899  	JBE  LBB0_772
   900  
   901  LBB0_182:
   902  	WORD $0xf631 // xor    esi, esi
   903  
   904  LBB0_1269:
   905  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   906  	WORD $0xf749; BYTE $0xd0 // not    r8
   907  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   908  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   909  	LONG $0x03e78348         // and    rdi, 3
   910  	JE   LBB0_1271
   911  
   912  LBB0_1270:
   913  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
   914  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   915  	LONG $0x01c68348         // add    rsi, 1
   916  	LONG $0xffc78348         // add    rdi, -1
   917  	JNE  LBB0_1270
   918  
   919  LBB0_1271:
   920  	LONG $0x03f88349 // cmp    r8, 3
   921  	JB   LBB0_1553
   922  
   923  LBB0_1272:
   924  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
   925  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   926  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
   927  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   928  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
   929  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   930  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
   931  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   932  	LONG $0x04c68348             // add    rsi, 4
   933  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   934  	JNE  LBB0_1272
   935  	JMP  LBB0_1553
   936  
   937  LBB0_183:
   938  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   939  	JE   LBB0_395
   940  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   941  	JNE  LBB0_1553
   942  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   943  	JLE  LBB0_1553
   944  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   945  	LONG $0x40f88341         // cmp    r8d, 64
   946  	JB   LBB0_187
   947  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
   948  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   949  	JBE  LBB0_775
   950  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   951  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   952  	JBE  LBB0_775
   953  
   954  LBB0_187:
   955  	WORD $0xf631 // xor    esi, esi
   956  
   957  LBB0_1277:
   958  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   959  	WORD $0xf749; BYTE $0xd0 // not    r8
   960  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   961  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   962  	LONG $0x03e78348         // and    rdi, 3
   963  	JE   LBB0_1279
   964  
   965  LBB0_1278:
   966  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
   967  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   968  	LONG $0x01c68348         // add    rsi, 1
   969  	LONG $0xffc78348         // add    rdi, -1
   970  	JNE  LBB0_1278
   971  
   972  LBB0_1279:
   973  	LONG $0x03f88349 // cmp    r8, 3
   974  	JB   LBB0_1553
   975  
   976  LBB0_1280:
   977  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
   978  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   979  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
   980  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   981  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
   982  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   983  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
   984  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   985  	LONG $0x04c68348             // add    rsi, 4
   986  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   987  	JNE  LBB0_1280
   988  	JMP  LBB0_1553
   989  
   990  LBB0_188:
   991  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   992  	JE   LBB0_398
   993  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   994  	JNE  LBB0_1553
   995  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   996  	JLE  LBB0_1553
   997  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   998  	LONG $0x40f88341         // cmp    r8d, 64
   999  	JB   LBB0_192
  1000  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1001  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1002  	JBE  LBB0_778
  1003  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1004  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1005  	JBE  LBB0_778
  1006  
  1007  LBB0_192:
  1008  	WORD $0xf631 // xor    esi, esi
  1009  
  1010  LBB0_1285:
  1011  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1012  	WORD $0xf749; BYTE $0xd0 // not    r8
  1013  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1014  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1015  	LONG $0x03e78348         // and    rdi, 3
  1016  	JE   LBB0_1287
  1017  
  1018  LBB0_1286:
  1019  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
  1020  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1021  	LONG $0x01c68348         // add    rsi, 1
  1022  	LONG $0xffc78348         // add    rdi, -1
  1023  	JNE  LBB0_1286
  1024  
  1025  LBB0_1287:
  1026  	LONG $0x03f88349 // cmp    r8, 3
  1027  	JB   LBB0_1553
  1028  
  1029  LBB0_1288:
  1030  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
  1031  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1032  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
  1033  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1034  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
  1035  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1036  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
  1037  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1038  	LONG $0x04c68348             // add    rsi, 4
  1039  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1040  	JNE  LBB0_1288
  1041  	JMP  LBB0_1553
  1042  
  1043  LBB0_193:
  1044  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1045  	JE   LBB0_401
  1046  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1047  	JNE  LBB0_1553
  1048  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1049  	JLE  LBB0_1553
  1050  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1051  	LONG $0x10f88341         // cmp    r8d, 16
  1052  	JB   LBB0_197
  1053  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  1054  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1055  	JBE  LBB0_781
  1056  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1057  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1058  	JBE  LBB0_781
  1059  
  1060  LBB0_197:
  1061  	WORD $0xf631 // xor    esi, esi
  1062  
  1063  LBB0_1293:
  1064  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1065  	WORD $0xf749; BYTE $0xd0 // not    r8
  1066  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1067  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1068  	LONG $0x03e78348         // and    rdi, 3
  1069  	JE   LBB0_1295
  1070  
  1071  LBB0_1294:
  1072  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  1073  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1074  	LONG $0x01c68348         // add    rsi, 1
  1075  	LONG $0xffc78348         // add    rdi, -1
  1076  	JNE  LBB0_1294
  1077  
  1078  LBB0_1295:
  1079  	LONG $0x03f88349 // cmp    r8, 3
  1080  	JB   LBB0_1553
  1081  
  1082  LBB0_1296:
  1083  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  1084  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1085  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  1086  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1087  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  1088  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1089  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  1090  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1091  	LONG $0x04c68348             // add    rsi, 4
  1092  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1093  	JNE  LBB0_1296
  1094  	JMP  LBB0_1553
  1095  
  1096  LBB0_198:
  1097  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1098  	JE   LBB0_404
  1099  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1100  	JNE  LBB0_1553
  1101  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1102  	JLE  LBB0_1553
  1103  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1104  	LONG $0x20f88341         // cmp    r8d, 32
  1105  	JB   LBB0_202
  1106  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  1107  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1108  	JBE  LBB0_784
  1109  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1110  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1111  	JBE  LBB0_784
  1112  
  1113  LBB0_202:
  1114  	WORD $0xf631 // xor    esi, esi
  1115  
  1116  LBB0_1301:
  1117  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1118  	WORD $0xf749; BYTE $0xd0 // not    r8
  1119  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1120  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1121  	LONG $0x03e78348         // and    rdi, 3
  1122  	JE   LBB0_1303
  1123  
  1124  LBB0_1302:
  1125  	LONG $0x042cfac5; BYTE $0xb2 // vcvttss2si    eax, dword [rdx + 4*rsi]
  1126  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1127  	LONG $0x01c68348             // add    rsi, 1
  1128  	LONG $0xffc78348             // add    rdi, -1
  1129  	JNE  LBB0_1302
  1130  
  1131  LBB0_1303:
  1132  	LONG $0x03f88349 // cmp    r8, 3
  1133  	JB   LBB0_1553
  1134  
  1135  LBB0_1304:
  1136  	LONG $0x042cfac5; BYTE $0xb2   // vcvttss2si    eax, dword [rdx + 4*rsi]
  1137  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  1138  	LONG $0x442cfac5; WORD $0x04b2 // vcvttss2si    eax, dword [rdx + 4*rsi + 4]
  1139  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  1140  	LONG $0x442cfac5; WORD $0x08b2 // vcvttss2si    eax, dword [rdx + 4*rsi + 8]
  1141  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  1142  	LONG $0x442cfac5; WORD $0x0cb2 // vcvttss2si    eax, dword [rdx + 4*rsi + 12]
  1143  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  1144  	LONG $0x04c68348               // add    rsi, 4
  1145  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  1146  	JNE  LBB0_1304
  1147  	JMP  LBB0_1553
  1148  
  1149  LBB0_203:
  1150  	WORD $0xfe83; BYTE $0x02                   // cmp    esi, 2
  1151  	JE   LBB0_407
  1152  	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
  1153  	JNE  LBB0_1553
  1154  	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
  1155  	JLE  LBB0_1553
  1156  	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
  1157  	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
  1158  	JB   LBB0_207
  1159  	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
  1160  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  1161  	JBE  LBB0_787
  1162  	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
  1163  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
  1164  	JBE  LBB0_787
  1165  
  1166  LBB0_207:
  1167  	WORD $0xf631 // xor    esi, esi
  1168  
  1169  LBB0_1309:
  1170  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1171  	WORD $0xf749; BYTE $0xd0 // not    r8
  1172  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1173  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1174  	LONG $0x03e78348         // and    rdi, 3
  1175  	JE   LBB0_1311
  1176  
  1177  LBB0_1310:
  1178  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  1179  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1180  	LONG $0x01c68348         // add    rsi, 1
  1181  	LONG $0xffc78348         // add    rdi, -1
  1182  	JNE  LBB0_1310
  1183  
  1184  LBB0_1311:
  1185  	LONG $0x03f88349 // cmp    r8, 3
  1186  	JB   LBB0_1553
  1187  
  1188  LBB0_1312:
  1189  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1190  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1191  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1192  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1193  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1194  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1195  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1196  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1197  	LONG $0x04c68348             // add    rsi, 4
  1198  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1199  	JNE  LBB0_1312
  1200  	JMP  LBB0_1553
  1201  
  1202  LBB0_208:
  1203  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1204  	JE   LBB0_410
  1205  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1206  	JNE  LBB0_1553
  1207  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1208  	JLE  LBB0_1553
  1209  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1210  	LONG $0x20f88341         // cmp    r8d, 32
  1211  	JB   LBB0_212
  1212  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  1213  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1214  	JBE  LBB0_790
  1215  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1216  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1217  	JBE  LBB0_790
  1218  
  1219  LBB0_212:
  1220  	WORD $0xf631 // xor    esi, esi
  1221  
  1222  LBB0_1317:
  1223  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1224  	WORD $0xf749; BYTE $0xd0 // not    r8
  1225  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1226  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1227  	LONG $0x03e78348         // and    rdi, 3
  1228  	JE   LBB0_1319
  1229  
  1230  LBB0_1318:
  1231  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  1232  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1233  	LONG $0x01c68348         // add    rsi, 1
  1234  	LONG $0xffc78348         // add    rdi, -1
  1235  	JNE  LBB0_1318
  1236  
  1237  LBB0_1319:
  1238  	LONG $0x03f88349 // cmp    r8, 3
  1239  	JB   LBB0_1553
  1240  
  1241  LBB0_1320:
  1242  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  1243  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1244  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  1245  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1246  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  1247  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1248  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  1249  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1250  	LONG $0x04c68348             // add    rsi, 4
  1251  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1252  	JNE  LBB0_1320
  1253  	JMP  LBB0_1553
  1254  
  1255  LBB0_213:
  1256  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1257  	JE   LBB0_413
  1258  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1259  	JNE  LBB0_1553
  1260  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1261  	JLE  LBB0_1553
  1262  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1263  	LONG $0x10f88341         // cmp    r8d, 16
  1264  	JAE  LBB0_535
  1265  	WORD $0xf631             // xor    esi, esi
  1266  	JMP  LBB0_934
  1267  
  1268  LBB0_218:
  1269  	WORD $0xfe83; BYTE $0x07               // cmp    esi, 7
  1270  	JE   LBB0_416
  1271  	WORD $0xfe83; BYTE $0x08               // cmp    esi, 8
  1272  	JNE  LBB0_1553
  1273  	WORD $0x8545; BYTE $0xc0               // test    r8d, r8d
  1274  	JLE  LBB0_1553
  1275  	WORD $0x8945; BYTE $0xc1               // mov    r9d, r8d
  1276  	QUAD $0x000000000000bb49; WORD $0x8000 // mov    r11, -9223372036854775808
  1277  	LONG $0x04f88341                       // cmp    r8d, 4
  1278  	JAE  LBB0_538
  1279  	WORD $0x3145; BYTE $0xf6               // xor    r14d, r14d
  1280  	JMP  LBB0_799
  1281  
  1282  LBB0_223:
  1283  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1284  	JE   LBB0_419
  1285  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1286  	JNE  LBB0_1553
  1287  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1288  	JLE  LBB0_1553
  1289  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1290  	LONG $0x10f88341         // cmp    r8d, 16
  1291  	JB   LBB0_227
  1292  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1293  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1294  	JBE  LBB0_801
  1295  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1296  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1297  	JBE  LBB0_801
  1298  
  1299  LBB0_227:
  1300  	WORD $0xf631 // xor    esi, esi
  1301  
  1302  LBB0_1325:
  1303  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1304  	WORD $0xf749; BYTE $0xd0 // not    r8
  1305  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1306  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1307  	LONG $0x03e78348         // and    rdi, 3
  1308  	JE   LBB0_1327
  1309  
  1310  LBB0_1326:
  1311  	LONG $0x04be0f48; BYTE $0x32 // movsx    rax, byte [rdx + rsi]
  1312  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1313  	LONG $0x01c68348             // add    rsi, 1
  1314  	LONG $0xffc78348             // add    rdi, -1
  1315  	JNE  LBB0_1326
  1316  
  1317  LBB0_1327:
  1318  	LONG $0x03f88349 // cmp    r8, 3
  1319  	JB   LBB0_1553
  1320  
  1321  LBB0_1328:
  1322  	LONG $0x04be0f48; BYTE $0x32   // movsx    rax, byte [rdx + rsi]
  1323  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  1324  	LONG $0x44be0f48; WORD $0x0132 // movsx    rax, byte [rdx + rsi + 1]
  1325  	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
  1326  	LONG $0x44be0f48; WORD $0x0232 // movsx    rax, byte [rdx + rsi + 2]
  1327  	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
  1328  	LONG $0x44be0f48; WORD $0x0332 // movsx    rax, byte [rdx + rsi + 3]
  1329  	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
  1330  	LONG $0x04c68348               // add    rsi, 4
  1331  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  1332  	JNE  LBB0_1328
  1333  	JMP  LBB0_1553
  1334  
  1335  LBB0_228:
  1336  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1337  	JE   LBB0_422
  1338  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1339  	JNE  LBB0_1553
  1340  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1341  	JLE  LBB0_1553
  1342  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1343  	LONG $0x10f88341         // cmp    r8d, 16
  1344  	JB   LBB0_232
  1345  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  1346  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1347  	JBE  LBB0_804
  1348  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1349  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1350  	JBE  LBB0_804
  1351  
  1352  LBB0_232:
  1353  	WORD $0xf631 // xor    esi, esi
  1354  
  1355  LBB0_1333:
  1356  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1357  	WORD $0xf749; BYTE $0xd0 // not    r8
  1358  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1359  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1360  	LONG $0x03e78348         // and    rdi, 3
  1361  	JE   LBB0_1335
  1362  
  1363  LBB0_1334:
  1364  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1365  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1366  	LONG $0x01c68348 // add    rsi, 1
  1367  	LONG $0xffc78348 // add    rdi, -1
  1368  	JNE  LBB0_1334
  1369  
  1370  LBB0_1335:
  1371  	LONG $0x03f88349 // cmp    r8, 3
  1372  	JB   LBB0_1553
  1373  
  1374  LBB0_1336:
  1375  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1376  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1377  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1378  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1379  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1380  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1381  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1382  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1383  	LONG $0x04c68348             // add    rsi, 4
  1384  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1385  	JNE  LBB0_1336
  1386  	JMP  LBB0_1553
  1387  
  1388  LBB0_233:
  1389  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1390  	JE   LBB0_425
  1391  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1392  	JNE  LBB0_1553
  1393  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1394  	JLE  LBB0_1553
  1395  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1396  	LONG $0x10f88341         // cmp    r8d, 16
  1397  	JAE  LBB0_546
  1398  	WORD $0xf631             // xor    esi, esi
  1399  	JMP  LBB0_939
  1400  
  1401  LBB0_238:
  1402  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1403  	JE   LBB0_428
  1404  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1405  	JNE  LBB0_1553
  1406  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1407  	JLE  LBB0_1553
  1408  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1409  	LONG $0x10f88341         // cmp    r8d, 16
  1410  	JAE  LBB0_549
  1411  	WORD $0xf631             // xor    esi, esi
  1412  	JMP  LBB0_944
  1413  
  1414  LBB0_243:
  1415  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1416  	JE   LBB0_431
  1417  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1418  	JNE  LBB0_1553
  1419  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1420  	JLE  LBB0_1553
  1421  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1422  	LONG $0x10f88341         // cmp    r8d, 16
  1423  	JB   LBB0_247
  1424  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  1425  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1426  	JBE  LBB0_807
  1427  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1428  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1429  	JBE  LBB0_807
  1430  
  1431  LBB0_247:
  1432  	WORD $0xf631 // xor    esi, esi
  1433  
  1434  LBB0_1341:
  1435  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1436  	WORD $0xf749; BYTE $0xd0 // not    r8
  1437  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1438  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1439  	LONG $0x03e78348         // and    rdi, 3
  1440  	JE   LBB0_1343
  1441  
  1442  LBB0_1342:
  1443  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1444  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1445  	LONG $0x01c68348 // add    rsi, 1
  1446  	LONG $0xffc78348 // add    rdi, -1
  1447  	JNE  LBB0_1342
  1448  
  1449  LBB0_1343:
  1450  	LONG $0x03f88349 // cmp    r8, 3
  1451  	JB   LBB0_1553
  1452  
  1453  LBB0_1344:
  1454  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1455  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1456  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1457  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1458  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1459  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1460  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1461  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1462  	LONG $0x04c68348             // add    rsi, 4
  1463  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1464  	JNE  LBB0_1344
  1465  	JMP  LBB0_1553
  1466  
  1467  LBB0_248:
  1468  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1469  	JE   LBB0_434
  1470  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1471  	JNE  LBB0_1553
  1472  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1473  	JLE  LBB0_1553
  1474  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1475  	LONG $0x04f88341         // cmp    r8d, 4
  1476  	JAE  LBB0_555
  1477  	WORD $0x3145; BYTE $0xf6 // xor    r14d, r14d
  1478  	JMP  LBB0_816
  1479  
  1480  LBB0_253:
  1481  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1482  	JE   LBB0_437
  1483  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1484  	JNE  LBB0_1553
  1485  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1486  	JLE  LBB0_1553
  1487  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1488  	LONG $0x10f88341         // cmp    r8d, 16
  1489  	JB   LBB0_257
  1490  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1491  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1492  	JBE  LBB0_818
  1493  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1494  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1495  	JBE  LBB0_818
  1496  
  1497  LBB0_257:
  1498  	WORD $0xf631 // xor    esi, esi
  1499  
  1500  LBB0_1349:
  1501  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1502  	WORD $0xf749; BYTE $0xd0 // not    r8
  1503  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1504  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1505  	LONG $0x03e78348         // and    rdi, 3
  1506  	JE   LBB0_1351
  1507  
  1508  LBB0_1350:
  1509  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  1510  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1511  	LONG $0x01c68348 // add    rsi, 1
  1512  	LONG $0xffc78348 // add    rdi, -1
  1513  	JNE  LBB0_1350
  1514  
  1515  LBB0_1351:
  1516  	LONG $0x03f88349 // cmp    r8, 3
  1517  	JB   LBB0_1553
  1518  
  1519  LBB0_1352:
  1520  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1521  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1522  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1523  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1524  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1525  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1526  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1527  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1528  	LONG $0x04c68348             // add    rsi, 4
  1529  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1530  	JNE  LBB0_1352
  1531  	JMP  LBB0_1553
  1532  
  1533  LBB0_258:
  1534  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1535  	JE   LBB0_440
  1536  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1537  	JNE  LBB0_1553
  1538  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1539  	JLE  LBB0_1553
  1540  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1541  	LONG $0x10f88341         // cmp    r8d, 16
  1542  	JAE  LBB0_560
  1543  	WORD $0xf631             // xor    esi, esi
  1544  	JMP  LBB0_949
  1545  
  1546  LBB0_263:
  1547  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1548  	JLE  LBB0_1553
  1549  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1550  	LONG $0x20f88341         // cmp    r8d, 32
  1551  	JAE  LBB0_563
  1552  	WORD $0xf631             // xor    esi, esi
  1553  	JMP  LBB0_1069
  1554  
  1555  LBB0_266:
  1556  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1557  	JLE  LBB0_1553
  1558  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1559  	LONG $0x20f88341         // cmp    r8d, 32
  1560  	JAE  LBB0_566
  1561  	WORD $0xf631             // xor    esi, esi
  1562  	JMP  LBB0_1074
  1563  
  1564  LBB0_269:
  1565  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1566  	JLE  LBB0_1553
  1567  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1568  	LONG $0x10f88341         // cmp    r8d, 16
  1569  	JAE  LBB0_569
  1570  	WORD $0xf631             // xor    esi, esi
  1571  	JMP  LBB0_1079
  1572  
  1573  LBB0_272:
  1574  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1575  	JLE  LBB0_1553
  1576  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1577  	LONG $0x10f88341         // cmp    r8d, 16
  1578  	JAE  LBB0_572
  1579  	WORD $0xf631             // xor    esi, esi
  1580  	JMP  LBB0_1084
  1581  
  1582  LBB0_275:
  1583  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1584  	JLE  LBB0_1553
  1585  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1586  	LONG $0x40f88341         // cmp    r8d, 64
  1587  	JB   LBB0_277
  1588  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1589  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1590  	JBE  LBB0_821
  1591  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1592  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1593  	JBE  LBB0_821
  1594  
  1595  LBB0_277:
  1596  	WORD $0xf631 // xor    esi, esi
  1597  
  1598  LBB0_1357:
  1599  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1600  	WORD $0xf749; BYTE $0xd0 // not    r8
  1601  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1602  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1603  	LONG $0x03e78348         // and    rdi, 3
  1604  	JE   LBB0_1359
  1605  
  1606  LBB0_1358:
  1607  	LONG $0x3204be0f // movsx    eax, byte [rdx + rsi]
  1608  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1609  	LONG $0x01c68348 // add    rsi, 1
  1610  	LONG $0xffc78348 // add    rdi, -1
  1611  	JNE  LBB0_1358
  1612  
  1613  LBB0_1359:
  1614  	LONG $0x03f88349 // cmp    r8, 3
  1615  	JB   LBB0_1553
  1616  
  1617  LBB0_1360:
  1618  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  1619  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1620  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  1621  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1622  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  1623  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1624  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  1625  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1626  	LONG $0x04c68348             // add    rsi, 4
  1627  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1628  	JNE  LBB0_1360
  1629  	JMP  LBB0_1553
  1630  
  1631  LBB0_278:
  1632  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1633  	JLE  LBB0_1553
  1634  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1635  	LONG $0x40f88341         // cmp    r8d, 64
  1636  	JB   LBB0_280
  1637  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1638  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1639  	JBE  LBB0_824
  1640  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1641  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1642  	JBE  LBB0_824
  1643  
  1644  LBB0_280:
  1645  	WORD $0xf631 // xor    esi, esi
  1646  
  1647  LBB0_1365:
  1648  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1649  	WORD $0xf749; BYTE $0xd0 // not    r8
  1650  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1651  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1652  	LONG $0x03e78348         // and    rdi, 3
  1653  	JE   LBB0_1367
  1654  
  1655  LBB0_1366:
  1656  	LONG $0x3204be0f // movsx    eax, byte [rdx + rsi]
  1657  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1658  	LONG $0x01c68348 // add    rsi, 1
  1659  	LONG $0xffc78348 // add    rdi, -1
  1660  	JNE  LBB0_1366
  1661  
  1662  LBB0_1367:
  1663  	LONG $0x03f88349 // cmp    r8, 3
  1664  	JB   LBB0_1553
  1665  
  1666  LBB0_1368:
  1667  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  1668  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1669  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  1670  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1671  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  1672  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1673  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  1674  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1675  	LONG $0x04c68348             // add    rsi, 4
  1676  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1677  	JNE  LBB0_1368
  1678  	JMP  LBB0_1553
  1679  
  1680  LBB0_281:
  1681  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1682  	JLE  LBB0_1553
  1683  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1684  	LONG $0x10f88341         // cmp    r8d, 16
  1685  	JAE  LBB0_581
  1686  	WORD $0xf631             // xor    esi, esi
  1687  	JMP  LBB0_954
  1688  
  1689  LBB0_284:
  1690  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1691  	JLE  LBB0_1553
  1692  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1693  	LONG $0x10f88341         // cmp    r8d, 16
  1694  	JAE  LBB0_584
  1695  	WORD $0xf631             // xor    esi, esi
  1696  	JMP  LBB0_959
  1697  
  1698  LBB0_287:
  1699  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1700  	JLE  LBB0_1553
  1701  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1702  	LONG $0x20f88341         // cmp    r8d, 32
  1703  	JB   LBB0_289
  1704  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1705  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1706  	JBE  LBB0_827
  1707  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1708  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1709  	JBE  LBB0_827
  1710  
  1711  LBB0_289:
  1712  	WORD $0xf631 // xor    esi, esi
  1713  
  1714  LBB0_1151:
  1715  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1716  	WORD $0xf749; BYTE $0xd0 // not    r8
  1717  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1718  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1719  	LONG $0x03e78348         // and    rdi, 3
  1720  	JE   LBB0_1153
  1721  
  1722  LBB0_1152:
  1723  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1724  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1725  	LONG $0x01c68348 // add    rsi, 1
  1726  	LONG $0xffc78348 // add    rdi, -1
  1727  	JNE  LBB0_1152
  1728  
  1729  LBB0_1153:
  1730  	LONG $0x03f88349 // cmp    r8, 3
  1731  	JB   LBB0_1553
  1732  
  1733  LBB0_1154:
  1734  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1735  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1736  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1737  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1738  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1739  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1740  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1741  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1742  	LONG $0x04c68348             // add    rsi, 4
  1743  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1744  	JNE  LBB0_1154
  1745  	JMP  LBB0_1553
  1746  
  1747  LBB0_290:
  1748  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1749  	JLE  LBB0_1553
  1750  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1751  	LONG $0x20f88341         // cmp    r8d, 32
  1752  	JB   LBB0_292
  1753  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1754  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1755  	JBE  LBB0_829
  1756  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1757  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1758  	JBE  LBB0_829
  1759  
  1760  LBB0_292:
  1761  	WORD $0xf631 // xor    esi, esi
  1762  
  1763  LBB0_1161:
  1764  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1765  	WORD $0xf749; BYTE $0xd0 // not    r8
  1766  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1767  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1768  	LONG $0x03e78348         // and    rdi, 3
  1769  	JE   LBB0_1163
  1770  
  1771  LBB0_1162:
  1772  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1773  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1774  	LONG $0x01c68348 // add    rsi, 1
  1775  	LONG $0xffc78348 // add    rdi, -1
  1776  	JNE  LBB0_1162
  1777  
  1778  LBB0_1163:
  1779  	LONG $0x03f88349 // cmp    r8, 3
  1780  	JB   LBB0_1553
  1781  
  1782  LBB0_1164:
  1783  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1784  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1785  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1786  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1787  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1788  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1789  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1790  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1791  	LONG $0x04c68348             // add    rsi, 4
  1792  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1793  	JNE  LBB0_1164
  1794  	JMP  LBB0_1553
  1795  
  1796  LBB0_293:
  1797  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1798  	JLE  LBB0_1553
  1799  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1800  	LONG $0x20f88341         // cmp    r8d, 32
  1801  	JB   LBB0_295
  1802  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1803  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1804  	JBE  LBB0_831
  1805  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1806  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1807  	JBE  LBB0_831
  1808  
  1809  LBB0_295:
  1810  	WORD $0xf631 // xor    esi, esi
  1811  
  1812  LBB0_1171:
  1813  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1814  	WORD $0xf749; BYTE $0xd0 // not    r8
  1815  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1816  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1817  	LONG $0x03e78348         // and    rdi, 3
  1818  	JE   LBB0_1173
  1819  
  1820  LBB0_1172:
  1821  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1822  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1823  	LONG $0x01c68348 // add    rsi, 1
  1824  	LONG $0xffc78348 // add    rdi, -1
  1825  	JNE  LBB0_1172
  1826  
  1827  LBB0_1173:
  1828  	LONG $0x03f88349 // cmp    r8, 3
  1829  	JB   LBB0_1553
  1830  
  1831  LBB0_1174:
  1832  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1833  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1834  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1835  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1836  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1837  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1838  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1839  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1840  	LONG $0x04c68348             // add    rsi, 4
  1841  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1842  	JNE  LBB0_1174
  1843  	JMP  LBB0_1553
  1844  
  1845  LBB0_296:
  1846  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1847  	JLE  LBB0_1553
  1848  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1849  	LONG $0x20f88341         // cmp    r8d, 32
  1850  	JB   LBB0_298
  1851  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1852  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1853  	JBE  LBB0_833
  1854  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1855  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1856  	JBE  LBB0_833
  1857  
  1858  LBB0_298:
  1859  	WORD $0xf631 // xor    esi, esi
  1860  
  1861  LBB0_1181:
  1862  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1863  	WORD $0xf749; BYTE $0xd0 // not    r8
  1864  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1865  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1866  	LONG $0x03e78348         // and    rdi, 3
  1867  	JE   LBB0_1183
  1868  
  1869  LBB0_1182:
  1870  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1871  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1872  	LONG $0x01c68348 // add    rsi, 1
  1873  	LONG $0xffc78348 // add    rdi, -1
  1874  	JNE  LBB0_1182
  1875  
  1876  LBB0_1183:
  1877  	LONG $0x03f88349 // cmp    r8, 3
  1878  	JB   LBB0_1553
  1879  
  1880  LBB0_1184:
  1881  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1882  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1883  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1884  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1885  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1886  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1887  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1888  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1889  	LONG $0x04c68348             // add    rsi, 4
  1890  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1891  	JNE  LBB0_1184
  1892  	JMP  LBB0_1553
  1893  
  1894  LBB0_299:
  1895  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1896  	JLE  LBB0_1553
  1897  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1898  	LONG $0x10f88341         // cmp    r8d, 16
  1899  	JAE  LBB0_599
  1900  	WORD $0xf631             // xor    esi, esi
  1901  	JMP  LBB0_964
  1902  
  1903  LBB0_302:
  1904  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1905  	JLE  LBB0_1553
  1906  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1907  	LONG $0x10f88341         // cmp    r8d, 16
  1908  	JAE  LBB0_602
  1909  	WORD $0xf631             // xor    esi, esi
  1910  	JMP  LBB0_1089
  1911  
  1912  LBB0_305:
  1913  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1914  	JLE  LBB0_1553
  1915  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1916  	LONG $0x20f88341         // cmp    r8d, 32
  1917  	JAE  LBB0_605
  1918  	WORD $0xf631             // xor    esi, esi
  1919  	JMP  LBB0_1094
  1920  
  1921  LBB0_308:
  1922  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1923  	JLE  LBB0_1553
  1924  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1925  	LONG $0x20f88341         // cmp    r8d, 32
  1926  	JAE  LBB0_608
  1927  	WORD $0xf631             // xor    esi, esi
  1928  	JMP  LBB0_1099
  1929  
  1930  LBB0_311:
  1931  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1932  	JLE  LBB0_1553
  1933  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1934  	LONG $0x40f88341         // cmp    r8d, 64
  1935  	JB   LBB0_313
  1936  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1937  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1938  	JBE  LBB0_835
  1939  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1940  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1941  	JBE  LBB0_835
  1942  
  1943  LBB0_313:
  1944  	WORD $0xf631 // xor    esi, esi
  1945  
  1946  LBB0_1373:
  1947  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1948  	WORD $0xf749; BYTE $0xd0 // not    r8
  1949  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1950  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1951  	LONG $0x03e78348         // and    rdi, 3
  1952  	JE   LBB0_1375
  1953  
  1954  LBB0_1374:
  1955  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  1956  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1957  	LONG $0x01c68348 // add    rsi, 1
  1958  	LONG $0xffc78348 // add    rdi, -1
  1959  	JNE  LBB0_1374
  1960  
  1961  LBB0_1375:
  1962  	LONG $0x03f88349 // cmp    r8, 3
  1963  	JB   LBB0_1553
  1964  
  1965  LBB0_1376:
  1966  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1967  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1968  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1969  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1970  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1971  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1972  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1973  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1974  	LONG $0x04c68348             // add    rsi, 4
  1975  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1976  	JNE  LBB0_1376
  1977  	JMP  LBB0_1553
  1978  
  1979  LBB0_314:
  1980  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1981  	JLE  LBB0_1553
  1982  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1983  	LONG $0x40f88341         // cmp    r8d, 64
  1984  	JB   LBB0_316
  1985  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1986  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1987  	JBE  LBB0_838
  1988  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1989  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1990  	JBE  LBB0_838
  1991  
  1992  LBB0_316:
  1993  	WORD $0xf631 // xor    esi, esi
  1994  
  1995  LBB0_1381:
  1996  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1997  	WORD $0xf749; BYTE $0xd0 // not    r8
  1998  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1999  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2000  	LONG $0x03e78348         // and    rdi, 3
  2001  	JE   LBB0_1383
  2002  
  2003  LBB0_1382:
  2004  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  2005  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  2006  	LONG $0x01c68348 // add    rsi, 1
  2007  	LONG $0xffc78348 // add    rdi, -1
  2008  	JNE  LBB0_1382
  2009  
  2010  LBB0_1383:
  2011  	LONG $0x03f88349 // cmp    r8, 3
  2012  	JB   LBB0_1553
  2013  
  2014  LBB0_1384:
  2015  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2016  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  2017  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2018  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  2019  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2020  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  2021  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2022  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  2023  	LONG $0x04c68348             // add    rsi, 4
  2024  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2025  	JNE  LBB0_1384
  2026  	JMP  LBB0_1553
  2027  
  2028  LBB0_317:
  2029  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2030  	JLE  LBB0_1553
  2031  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2032  	LONG $0x20f88341         // cmp    r8d, 32
  2033  	JAE  LBB0_617
  2034  	WORD $0xf631             // xor    esi, esi
  2035  	JMP  LBB0_969
  2036  
  2037  LBB0_320:
  2038  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2039  	JLE  LBB0_1553
  2040  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2041  	LONG $0x20f88341         // cmp    r8d, 32
  2042  	JAE  LBB0_620
  2043  	WORD $0xf631             // xor    esi, esi
  2044  	JMP  LBB0_974
  2045  
  2046  LBB0_323:
  2047  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2048  	JLE  LBB0_1553
  2049  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2050  	LONG $0x10f88341         // cmp    r8d, 16
  2051  	JAE  LBB0_623
  2052  	WORD $0xf631             // xor    esi, esi
  2053  	JMP  LBB0_1104
  2054  
  2055  LBB0_326:
  2056  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2057  	JLE  LBB0_1553
  2058  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2059  	LONG $0x20f88341         // cmp    r8d, 32
  2060  	JAE  LBB0_626
  2061  	WORD $0xf631             // xor    esi, esi
  2062  	JMP  LBB0_1109
  2063  
  2064  LBB0_329:
  2065  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2066  	JLE  LBB0_1553
  2067  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2068  	LONG $0x10f88341         // cmp    r8d, 16
  2069  	JAE  LBB0_629
  2070  	WORD $0xf631             // xor    esi, esi
  2071  	JMP  LBB0_1114
  2072  
  2073  LBB0_332:
  2074  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2075  	JLE  LBB0_1553
  2076  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2077  	LONG $0x10f88341         // cmp    r8d, 16
  2078  	JAE  LBB0_632
  2079  	WORD $0xf631             // xor    esi, esi
  2080  	JMP  LBB0_1119
  2081  
  2082  LBB0_335:
  2083  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2084  	JLE  LBB0_1553
  2085  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2086  	LONG $0x10f88341         // cmp    r8d, 16
  2087  	JB   LBB0_337
  2088  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2089  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2090  	JBE  LBB0_841
  2091  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2092  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2093  	JBE  LBB0_841
  2094  
  2095  LBB0_337:
  2096  	WORD $0xf631 // xor    esi, esi
  2097  
  2098  LBB0_1389:
  2099  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2100  	WORD $0xf749; BYTE $0xd0 // not    r8
  2101  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2102  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2103  	LONG $0x03e78348         // and    rdi, 3
  2104  	JE   LBB0_1391
  2105  
  2106  LBB0_1390:
  2107  	LONG $0x04be0f48; BYTE $0x32 // movsx    rax, byte [rdx + rsi]
  2108  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2109  	LONG $0x01c68348             // add    rsi, 1
  2110  	LONG $0xffc78348             // add    rdi, -1
  2111  	JNE  LBB0_1390
  2112  
  2113  LBB0_1391:
  2114  	LONG $0x03f88349 // cmp    r8, 3
  2115  	JB   LBB0_1553
  2116  
  2117  LBB0_1392:
  2118  	LONG $0x04be0f48; BYTE $0x32   // movsx    rax, byte [rdx + rsi]
  2119  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  2120  	LONG $0x44be0f48; WORD $0x0132 // movsx    rax, byte [rdx + rsi + 1]
  2121  	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
  2122  	LONG $0x44be0f48; WORD $0x0232 // movsx    rax, byte [rdx + rsi + 2]
  2123  	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
  2124  	LONG $0x44be0f48; WORD $0x0332 // movsx    rax, byte [rdx + rsi + 3]
  2125  	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
  2126  	LONG $0x04c68348               // add    rsi, 4
  2127  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2128  	JNE  LBB0_1392
  2129  	JMP  LBB0_1553
  2130  
  2131  LBB0_338:
  2132  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2133  	JLE  LBB0_1553
  2134  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2135  	LONG $0x20f88341         // cmp    r8d, 32
  2136  	JB   LBB0_340
  2137  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2138  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2139  	JBE  LBB0_844
  2140  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2141  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2142  	JBE  LBB0_844
  2143  
  2144  LBB0_340:
  2145  	WORD $0xf631 // xor    esi, esi
  2146  
  2147  LBB0_1397:
  2148  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2149  	WORD $0xf749; BYTE $0xd0 // not    r8
  2150  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2151  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2152  	LONG $0x03e78348         // and    rdi, 3
  2153  	JE   LBB0_1399
  2154  
  2155  LBB0_1398:
  2156  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  2157  	LONG $0xc02adac5             // vcvtsi2ss    xmm0, xmm4, eax
  2158  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  2159  	LONG $0x01c68348             // add    rsi, 1
  2160  	LONG $0xffc78348             // add    rdi, -1
  2161  	JNE  LBB0_1398
  2162  
  2163  LBB0_1399:
  2164  	LONG $0x03f88349 // cmp    r8, 3
  2165  	JB   LBB0_1553
  2166  
  2167  LBB0_1400:
  2168  	LONG $0x3204be0f               // movsx    eax, byte [rdx + rsi]
  2169  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2170  	LONG $0x0411fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm0
  2171  	LONG $0x3244be0f; BYTE $0x01   // movsx    eax, byte [rdx + rsi + 1]
  2172  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2173  	LONG $0x4411fac5; WORD $0x04b1 // vmovss    dword [rcx + 4*rsi + 4], xmm0
  2174  	LONG $0x3244be0f; BYTE $0x02   // movsx    eax, byte [rdx + rsi + 2]
  2175  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2176  	LONG $0x4411fac5; WORD $0x08b1 // vmovss    dword [rcx + 4*rsi + 8], xmm0
  2177  	LONG $0x3244be0f; BYTE $0x03   // movsx    eax, byte [rdx + rsi + 3]
  2178  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2179  	LONG $0x4411fac5; WORD $0x0cb1 // vmovss    dword [rcx + 4*rsi + 12], xmm0
  2180  	LONG $0x04c68348               // add    rsi, 4
  2181  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2182  	JNE  LBB0_1400
  2183  	JMP  LBB0_1553
  2184  
  2185  LBB0_341:
  2186  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2187  	JLE  LBB0_1553
  2188  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2189  	LONG $0x10f88341         // cmp    r8d, 16
  2190  	JB   LBB0_343
  2191  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2192  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2193  	JBE  LBB0_847
  2194  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2195  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2196  	JBE  LBB0_847
  2197  
  2198  LBB0_343:
  2199  	WORD $0xf631 // xor    esi, esi
  2200  
  2201  LBB0_1405:
  2202  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2203  	WORD $0xf749; BYTE $0xd0 // not    r8
  2204  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2205  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2206  	LONG $0x03e78348         // and    rdi, 3
  2207  	JE   LBB0_1407
  2208  
  2209  LBB0_1406:
  2210  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2211  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2212  	LONG $0x01c68348 // add    rsi, 1
  2213  	LONG $0xffc78348 // add    rdi, -1
  2214  	JNE  LBB0_1406
  2215  
  2216  LBB0_1407:
  2217  	LONG $0x03f88349 // cmp    r8, 3
  2218  	JB   LBB0_1553
  2219  
  2220  LBB0_1408:
  2221  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2222  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2223  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2224  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2225  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2226  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2227  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2228  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2229  	LONG $0x04c68348             // add    rsi, 4
  2230  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2231  	JNE  LBB0_1408
  2232  	JMP  LBB0_1553
  2233  
  2234  LBB0_344:
  2235  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2236  	JLE  LBB0_1553
  2237  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2238  	LONG $0x04f88341         // cmp    r8d, 4
  2239  	JAE  LBB0_644
  2240  	WORD $0xf631             // xor    esi, esi
  2241  	JMP  LBB0_858
  2242  
  2243  LBB0_347:
  2244  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2245  	JLE  LBB0_1553
  2246  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2247  	LONG $0x10f88341         // cmp    r8d, 16
  2248  	JAE  LBB0_646
  2249  	WORD $0xf631             // xor    esi, esi
  2250  	JMP  LBB0_979
  2251  
  2252  LBB0_350:
  2253  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2254  	JLE  LBB0_1553
  2255  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2256  	LONG $0x20f88341         // cmp    r8d, 32
  2257  	JAE  LBB0_649
  2258  	WORD $0xf631             // xor    esi, esi
  2259  	JMP  LBB0_1124
  2260  
  2261  LBB0_353:
  2262  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2263  	JLE  LBB0_1553
  2264  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2265  	LONG $0x10f88341         // cmp    r8d, 16
  2266  	JAE  LBB0_652
  2267  	WORD $0xf631             // xor    esi, esi
  2268  	JMP  LBB0_1129
  2269  
  2270  LBB0_356:
  2271  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2272  	JLE  LBB0_1553
  2273  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2274  	LONG $0x20f88341         // cmp    r8d, 32
  2275  	JAE  LBB0_655
  2276  	WORD $0xf631             // xor    esi, esi
  2277  	JMP  LBB0_1134
  2278  
  2279  LBB0_359:
  2280  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2281  	JLE  LBB0_1553
  2282  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2283  	LONG $0x10f88341         // cmp    r8d, 16
  2284  	JB   LBB0_361
  2285  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2286  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2287  	JBE  LBB0_860
  2288  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2289  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2290  	JBE  LBB0_860
  2291  
  2292  LBB0_361:
  2293  	WORD $0xf631 // xor    esi, esi
  2294  
  2295  LBB0_1413:
  2296  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2297  	WORD $0xf749; BYTE $0xd0 // not    r8
  2298  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2299  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2300  	LONG $0x03e78348         // and    rdi, 3
  2301  	JE   LBB0_1415
  2302  
  2303  LBB0_1414:
  2304  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2305  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2306  	LONG $0x01c68348 // add    rsi, 1
  2307  	LONG $0xffc78348 // add    rdi, -1
  2308  	JNE  LBB0_1414
  2309  
  2310  LBB0_1415:
  2311  	LONG $0x03f88349 // cmp    r8, 3
  2312  	JB   LBB0_1553
  2313  
  2314  LBB0_1416:
  2315  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2316  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2317  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2318  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2319  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2320  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2321  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2322  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2323  	LONG $0x04c68348             // add    rsi, 4
  2324  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2325  	JNE  LBB0_1416
  2326  	JMP  LBB0_1553
  2327  
  2328  LBB0_362:
  2329  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2330  	JLE  LBB0_1553
  2331  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2332  	LONG $0x10f88341         // cmp    r8d, 16
  2333  	JAE  LBB0_661
  2334  	WORD $0xf631             // xor    esi, esi
  2335  	JMP  LBB0_1139
  2336  
  2337  LBB0_365:
  2338  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2339  	JLE  LBB0_1553
  2340  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2341  	LONG $0x10f88341         // cmp    r8d, 16
  2342  	JAE  LBB0_664
  2343  	WORD $0xf631             // xor    esi, esi
  2344  	JMP  LBB0_1144
  2345  
  2346  LBB0_368:
  2347  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2348  	JLE  LBB0_1553
  2349  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2350  	LONG $0x20f88341         // cmp    r8d, 32
  2351  	JB   LBB0_370
  2352  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2353  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2354  	JBE  LBB0_863
  2355  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2356  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2357  	JBE  LBB0_863
  2358  
  2359  LBB0_370:
  2360  	WORD $0xf631 // xor    esi, esi
  2361  
  2362  LBB0_1421:
  2363  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  2364  	WORD $0xf748; BYTE $0xd0 // not    rax
  2365  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
  2366  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2367  	LONG $0x07e78348         // and    rdi, 7
  2368  	JE   LBB0_1423
  2369  
  2370  LBB0_1422:
  2371  	WORD $0x1c8b; BYTE $0xb2 // mov    ebx, dword [rdx + 4*rsi]
  2372  	WORD $0x1c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], ebx
  2373  	LONG $0x01c68348         // add    rsi, 1
  2374  	LONG $0xffc78348         // add    rdi, -1
  2375  	JNE  LBB0_1422
  2376  
  2377  LBB0_1423:
  2378  	LONG $0x07f88348 // cmp    rax, 7
  2379  	JB   LBB0_1553
  2380  
  2381  LBB0_1424:
  2382  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  2383  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  2384  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  2385  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  2386  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  2387  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  2388  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  2389  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  2390  	LONG $0x10b2448b         // mov    eax, dword [rdx + 4*rsi + 16]
  2391  	LONG $0x10b14489         // mov    dword [rcx + 4*rsi + 16], eax
  2392  	LONG $0x14b2448b         // mov    eax, dword [rdx + 4*rsi + 20]
  2393  	LONG $0x14b14489         // mov    dword [rcx + 4*rsi + 20], eax
  2394  	LONG $0x18b2448b         // mov    eax, dword [rdx + 4*rsi + 24]
  2395  	LONG $0x18b14489         // mov    dword [rcx + 4*rsi + 24], eax
  2396  	LONG $0x1cb2448b         // mov    eax, dword [rdx + 4*rsi + 28]
  2397  	LONG $0x1cb14489         // mov    dword [rcx + 4*rsi + 28], eax
  2398  	LONG $0x08c68348         // add    rsi, 8
  2399  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  2400  	JNE  LBB0_1424
  2401  	JMP  LBB0_1553
  2402  
  2403  LBB0_371:
  2404  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2405  	JLE  LBB0_1553
  2406  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2407  	LONG $0x10f88341         // cmp    r8d, 16
  2408  	JB   LBB0_373
  2409  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2410  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2411  	JBE  LBB0_866
  2412  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2413  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2414  	JBE  LBB0_866
  2415  
  2416  LBB0_373:
  2417  	WORD $0xf631 // xor    esi, esi
  2418  
  2419  LBB0_1429:
  2420  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2421  	WORD $0xf749; BYTE $0xd0 // not    r8
  2422  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2423  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2424  	LONG $0x03e78348         // and    rdi, 3
  2425  	JE   LBB0_1431
  2426  
  2427  LBB0_1430:
  2428  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  2429  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2430  	LONG $0x01c68348 // add    rsi, 1
  2431  	LONG $0xffc78348 // add    rdi, -1
  2432  	JNE  LBB0_1430
  2433  
  2434  LBB0_1431:
  2435  	LONG $0x03f88349 // cmp    r8, 3
  2436  	JB   LBB0_1553
  2437  
  2438  LBB0_1432:
  2439  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2440  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2441  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2442  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2443  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2444  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2445  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2446  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2447  	LONG $0x04c68348             // add    rsi, 4
  2448  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2449  	JNE  LBB0_1432
  2450  	JMP  LBB0_1553
  2451  
  2452  LBB0_374:
  2453  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2454  	JLE  LBB0_1553
  2455  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2456  	LONG $0x20f88341         // cmp    r8d, 32
  2457  	JB   LBB0_376
  2458  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2459  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2460  	JBE  LBB0_869
  2461  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2462  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2463  	JBE  LBB0_869
  2464  
  2465  LBB0_376:
  2466  	WORD $0xf631 // xor    esi, esi
  2467  
  2468  LBB0_1437:
  2469  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2470  	WORD $0xf749; BYTE $0xd0 // not    r8
  2471  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2472  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2473  	LONG $0x03e78348         // and    rdi, 3
  2474  	JE   LBB0_1439
  2475  
  2476  LBB0_1438:
  2477  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2478  	LONG $0xc02adac5             // vcvtsi2ss    xmm0, xmm4, eax
  2479  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  2480  	LONG $0x01c68348             // add    rsi, 1
  2481  	LONG $0xffc78348             // add    rdi, -1
  2482  	JNE  LBB0_1438
  2483  
  2484  LBB0_1439:
  2485  	LONG $0x03f88349 // cmp    r8, 3
  2486  	JB   LBB0_1553
  2487  
  2488  LBB0_1440:
  2489  	LONG $0x3204b60f               // movzx    eax, byte [rdx + rsi]
  2490  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2491  	LONG $0x0411fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm0
  2492  	LONG $0x3244b60f; BYTE $0x01   // movzx    eax, byte [rdx + rsi + 1]
  2493  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2494  	LONG $0x4411fac5; WORD $0x04b1 // vmovss    dword [rcx + 4*rsi + 4], xmm0
  2495  	LONG $0x3244b60f; BYTE $0x02   // movzx    eax, byte [rdx + rsi + 2]
  2496  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2497  	LONG $0x4411fac5; WORD $0x08b1 // vmovss    dword [rcx + 4*rsi + 8], xmm0
  2498  	LONG $0x3244b60f; BYTE $0x03   // movzx    eax, byte [rdx + rsi + 3]
  2499  	LONG $0xc02adac5               // vcvtsi2ss    xmm0, xmm4, eax
  2500  	LONG $0x4411fac5; WORD $0x0cb1 // vmovss    dword [rcx + 4*rsi + 12], xmm0
  2501  	LONG $0x04c68348               // add    rsi, 4
  2502  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2503  	JNE  LBB0_1440
  2504  	JMP  LBB0_1553
  2505  
  2506  LBB0_377:
  2507  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2508  	JLE  LBB0_1553
  2509  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2510  	LONG $0x10f88341         // cmp    r8d, 16
  2511  	JAE  LBB0_676
  2512  	WORD $0xf631             // xor    esi, esi
  2513  	JMP  LBB0_984
  2514  
  2515  LBB0_380:
  2516  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2517  	JLE  LBB0_1553
  2518  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2519  	LONG $0x20f88341         // cmp    r8d, 32
  2520  	JAE  LBB0_679
  2521  	WORD $0xf631             // xor    esi, esi
  2522  	JMP  LBB0_989
  2523  
  2524  LBB0_383:
  2525  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2526  	JLE  LBB0_1553
  2527  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2528  	LONG $0x20f88341         // cmp    r8d, 32
  2529  	JB   LBB0_385
  2530  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2531  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2532  	JBE  LBB0_872
  2533  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2534  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2535  	JBE  LBB0_872
  2536  
  2537  LBB0_385:
  2538  	WORD $0xf631 // xor    esi, esi
  2539  
  2540  LBB0_1445:
  2541  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2542  	WORD $0xf749; BYTE $0xd0 // not    r8
  2543  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2544  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2545  	LONG $0x03e78348         // and    rdi, 3
  2546  	JE   LBB0_1447
  2547  
  2548  LBB0_1446:
  2549  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  2550  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2551  	LONG $0x01c68348         // add    rsi, 1
  2552  	LONG $0xffc78348         // add    rdi, -1
  2553  	JNE  LBB0_1446
  2554  
  2555  LBB0_1447:
  2556  	LONG $0x03f88349 // cmp    r8, 3
  2557  	JB   LBB0_1553
  2558  
  2559  LBB0_1448:
  2560  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  2561  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2562  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  2563  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2564  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  2565  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2566  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  2567  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2568  	LONG $0x04c68348             // add    rsi, 4
  2569  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2570  	JNE  LBB0_1448
  2571  	JMP  LBB0_1553
  2572  
  2573  LBB0_386:
  2574  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2575  	JLE  LBB0_1553
  2576  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2577  	LONG $0x10f88341         // cmp    r8d, 16
  2578  	JB   LBB0_388
  2579  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2580  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2581  	JBE  LBB0_875
  2582  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2583  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2584  	JBE  LBB0_875
  2585  
  2586  LBB0_388:
  2587  	WORD $0xf631 // xor    esi, esi
  2588  
  2589  LBB0_1453:
  2590  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  2591  	WORD $0xf748; BYTE $0xd0 // not    rax
  2592  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
  2593  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2594  	LONG $0x03e78348         // and    rdi, 3
  2595  	JE   LBB0_1455
  2596  
  2597  LBB0_1454:
  2598  	LONG $0x1c2cfbc5; BYTE $0xf2 // vcvttsd2si    ebx, qword [rdx + 8*rsi]
  2599  	WORD $0x1c88; BYTE $0x31     // mov    byte [rcx + rsi], bl
  2600  	LONG $0x01c68348             // add    rsi, 1
  2601  	LONG $0xffc78348             // add    rdi, -1
  2602  	JNE  LBB0_1454
  2603  
  2604  LBB0_1455:
  2605  	LONG $0x03f88348 // cmp    rax, 3
  2606  	JB   LBB0_1553
  2607  
  2608  LBB0_1456:
  2609  	LONG $0x042cfbc5; BYTE $0xf2   // vcvttsd2si    eax, qword [rdx + 8*rsi]
  2610  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  2611  	LONG $0x442cfbc5; WORD $0x08f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 8]
  2612  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  2613  	LONG $0x442cfbc5; WORD $0x10f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 16]
  2614  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  2615  	LONG $0x442cfbc5; WORD $0x18f2 // vcvttsd2si    eax, qword [rdx + 8*rsi + 24]
  2616  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  2617  	LONG $0x04c68348               // add    rsi, 4
  2618  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2619  	JNE  LBB0_1456
  2620  	JMP  LBB0_1553
  2621  
  2622  LBB0_389:
  2623  	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
  2624  	JLE  LBB0_1553
  2625  	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
  2626  	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
  2627  	JB   LBB0_391
  2628  	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
  2629  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  2630  	JBE  LBB0_878
  2631  	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
  2632  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
  2633  	JBE  LBB0_878
  2634  
  2635  LBB0_391:
  2636  	WORD $0xf631 // xor    esi, esi
  2637  
  2638  LBB0_1461:
  2639  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2640  	WORD $0xf749; BYTE $0xd0 // not    r8
  2641  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2642  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2643  	LONG $0x03e78348         // and    rdi, 3
  2644  	JE   LBB0_1463
  2645  
  2646  LBB0_1462:
  2647  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2648  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2649  	LONG $0x01c68348         // add    rsi, 1
  2650  	LONG $0xffc78348         // add    rdi, -1
  2651  	JNE  LBB0_1462
  2652  
  2653  LBB0_1463:
  2654  	LONG $0x03f88349 // cmp    r8, 3
  2655  	JB   LBB0_1553
  2656  
  2657  LBB0_1464:
  2658  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2659  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2660  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2661  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2662  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2663  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2664  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2665  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2666  	LONG $0x04c68348             // add    rsi, 4
  2667  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2668  	JNE  LBB0_1464
  2669  	JMP  LBB0_1553
  2670  
  2671  LBB0_392:
  2672  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2673  	JLE  LBB0_1553
  2674  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2675  	LONG $0x10f88341         // cmp    r8d, 16
  2676  	JB   LBB0_394
  2677  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2678  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2679  	JBE  LBB0_881
  2680  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2681  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2682  	JBE  LBB0_881
  2683  
  2684  LBB0_394:
  2685  	WORD $0xf631 // xor    esi, esi
  2686  
  2687  LBB0_1469:
  2688  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2689  	WORD $0xf749; BYTE $0xd0 // not    r8
  2690  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2691  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2692  	LONG $0x03e78348         // and    rdi, 3
  2693  	JE   LBB0_1471
  2694  
  2695  LBB0_1470:
  2696  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  2697  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2698  	LONG $0x01c68348         // add    rsi, 1
  2699  	LONG $0xffc78348         // add    rdi, -1
  2700  	JNE  LBB0_1470
  2701  
  2702  LBB0_1471:
  2703  	LONG $0x03f88349 // cmp    r8, 3
  2704  	JB   LBB0_1553
  2705  
  2706  LBB0_1472:
  2707  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  2708  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2709  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  2710  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2711  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  2712  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2713  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  2714  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2715  	LONG $0x04c68348             // add    rsi, 4
  2716  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2717  	JNE  LBB0_1472
  2718  	JMP  LBB0_1553
  2719  
  2720  LBB0_395:
  2721  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2722  	JLE  LBB0_1553
  2723  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2724  	LONG $0x40f88341         // cmp    r8d, 64
  2725  	JB   LBB0_397
  2726  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  2727  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2728  	JBE  LBB0_884
  2729  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2730  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2731  	JBE  LBB0_884
  2732  
  2733  LBB0_397:
  2734  	WORD $0xf631 // xor    esi, esi
  2735  
  2736  LBB0_1477:
  2737  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2738  	WORD $0xf749; BYTE $0xd0 // not    r8
  2739  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2740  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2741  	LONG $0x03e78348         // and    rdi, 3
  2742  	JE   LBB0_1479
  2743  
  2744  LBB0_1478:
  2745  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
  2746  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2747  	LONG $0x01c68348         // add    rsi, 1
  2748  	LONG $0xffc78348         // add    rdi, -1
  2749  	JNE  LBB0_1478
  2750  
  2751  LBB0_1479:
  2752  	LONG $0x03f88349 // cmp    r8, 3
  2753  	JB   LBB0_1553
  2754  
  2755  LBB0_1480:
  2756  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
  2757  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2758  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
  2759  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2760  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
  2761  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2762  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
  2763  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2764  	LONG $0x04c68348             // add    rsi, 4
  2765  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2766  	JNE  LBB0_1480
  2767  	JMP  LBB0_1553
  2768  
  2769  LBB0_398:
  2770  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2771  	JLE  LBB0_1553
  2772  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2773  	LONG $0x40f88341         // cmp    r8d, 64
  2774  	JB   LBB0_400
  2775  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  2776  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2777  	JBE  LBB0_887
  2778  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2779  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2780  	JBE  LBB0_887
  2781  
  2782  LBB0_400:
  2783  	WORD $0xf631 // xor    esi, esi
  2784  
  2785  LBB0_1485:
  2786  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2787  	WORD $0xf749; BYTE $0xd0 // not    r8
  2788  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2789  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2790  	LONG $0x03e78348         // and    rdi, 3
  2791  	JE   LBB0_1487
  2792  
  2793  LBB0_1486:
  2794  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
  2795  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2796  	LONG $0x01c68348         // add    rsi, 1
  2797  	LONG $0xffc78348         // add    rdi, -1
  2798  	JNE  LBB0_1486
  2799  
  2800  LBB0_1487:
  2801  	LONG $0x03f88349 // cmp    r8, 3
  2802  	JB   LBB0_1553
  2803  
  2804  LBB0_1488:
  2805  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
  2806  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2807  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
  2808  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2809  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
  2810  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2811  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
  2812  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2813  	LONG $0x04c68348             // add    rsi, 4
  2814  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2815  	JNE  LBB0_1488
  2816  	JMP  LBB0_1553
  2817  
  2818  LBB0_401:
  2819  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2820  	JLE  LBB0_1553
  2821  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2822  	LONG $0x10f88341         // cmp    r8d, 16
  2823  	JB   LBB0_403
  2824  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2825  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2826  	JBE  LBB0_890
  2827  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2828  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2829  	JBE  LBB0_890
  2830  
  2831  LBB0_403:
  2832  	WORD $0xf631 // xor    esi, esi
  2833  
  2834  LBB0_1493:
  2835  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2836  	WORD $0xf749; BYTE $0xd0 // not    r8
  2837  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2838  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2839  	LONG $0x03e78348         // and    rdi, 3
  2840  	JE   LBB0_1495
  2841  
  2842  LBB0_1494:
  2843  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  2844  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2845  	LONG $0x01c68348         // add    rsi, 1
  2846  	LONG $0xffc78348         // add    rdi, -1
  2847  	JNE  LBB0_1494
  2848  
  2849  LBB0_1495:
  2850  	LONG $0x03f88349 // cmp    r8, 3
  2851  	JB   LBB0_1553
  2852  
  2853  LBB0_1496:
  2854  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  2855  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2856  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  2857  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2858  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  2859  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2860  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  2861  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2862  	LONG $0x04c68348             // add    rsi, 4
  2863  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2864  	JNE  LBB0_1496
  2865  	JMP  LBB0_1553
  2866  
  2867  LBB0_404:
  2868  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2869  	JLE  LBB0_1553
  2870  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2871  	LONG $0x20f88341         // cmp    r8d, 32
  2872  	JB   LBB0_406
  2873  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2874  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2875  	JBE  LBB0_893
  2876  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2877  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2878  	JBE  LBB0_893
  2879  
  2880  LBB0_406:
  2881  	WORD $0xf631 // xor    esi, esi
  2882  
  2883  LBB0_1501:
  2884  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2885  	WORD $0xf749; BYTE $0xd0 // not    r8
  2886  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2887  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2888  	LONG $0x03e78348         // and    rdi, 3
  2889  	JE   LBB0_1503
  2890  
  2891  LBB0_1502:
  2892  	LONG $0x042cfac5; BYTE $0xb2 // vcvttss2si    eax, dword [rdx + 4*rsi]
  2893  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2894  	LONG $0x01c68348             // add    rsi, 1
  2895  	LONG $0xffc78348             // add    rdi, -1
  2896  	JNE  LBB0_1502
  2897  
  2898  LBB0_1503:
  2899  	LONG $0x03f88349 // cmp    r8, 3
  2900  	JB   LBB0_1553
  2901  
  2902  LBB0_1504:
  2903  	LONG $0x042cfac5; BYTE $0xb2   // vcvttss2si    eax, dword [rdx + 4*rsi]
  2904  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  2905  	LONG $0x442cfac5; WORD $0x04b2 // vcvttss2si    eax, dword [rdx + 4*rsi + 4]
  2906  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  2907  	LONG $0x442cfac5; WORD $0x08b2 // vcvttss2si    eax, dword [rdx + 4*rsi + 8]
  2908  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  2909  	LONG $0x442cfac5; WORD $0x0cb2 // vcvttss2si    eax, dword [rdx + 4*rsi + 12]
  2910  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  2911  	LONG $0x04c68348               // add    rsi, 4
  2912  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2913  	JNE  LBB0_1504
  2914  	JMP  LBB0_1553
  2915  
  2916  LBB0_407:
  2917  	WORD $0x8545; BYTE $0xc0                   // test    r8d, r8d
  2918  	JLE  LBB0_1553
  2919  	WORD $0x8945; BYTE $0xc1                   // mov    r9d, r8d
  2920  	LONG $0x80f88141; WORD $0x0000; BYTE $0x00 // cmp    r8d, 128
  2921  	JB   LBB0_409
  2922  	LONG $0x0a048d4a                           // lea    rax, [rdx + r9]
  2923  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  2924  	JBE  LBB0_896
  2925  	LONG $0x09048d4a                           // lea    rax, [rcx + r9]
  2926  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
  2927  	JBE  LBB0_896
  2928  
  2929  LBB0_409:
  2930  	WORD $0xf631 // xor    esi, esi
  2931  
  2932  LBB0_1509:
  2933  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2934  	WORD $0xf749; BYTE $0xd0 // not    r8
  2935  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2936  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2937  	LONG $0x03e78348         // and    rdi, 3
  2938  	JE   LBB0_1511
  2939  
  2940  LBB0_1510:
  2941  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2942  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2943  	LONG $0x01c68348         // add    rsi, 1
  2944  	LONG $0xffc78348         // add    rdi, -1
  2945  	JNE  LBB0_1510
  2946  
  2947  LBB0_1511:
  2948  	LONG $0x03f88349 // cmp    r8, 3
  2949  	JB   LBB0_1553
  2950  
  2951  LBB0_1512:
  2952  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2953  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2954  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2955  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2956  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2957  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2958  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2959  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2960  	LONG $0x04c68348             // add    rsi, 4
  2961  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2962  	JNE  LBB0_1512
  2963  	JMP  LBB0_1553
  2964  
  2965  LBB0_410:
  2966  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2967  	JLE  LBB0_1553
  2968  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2969  	LONG $0x20f88341         // cmp    r8d, 32
  2970  	JB   LBB0_412
  2971  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2972  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2973  	JBE  LBB0_899
  2974  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2975  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2976  	JBE  LBB0_899
  2977  
  2978  LBB0_412:
  2979  	WORD $0xf631 // xor    esi, esi
  2980  
  2981  LBB0_1517:
  2982  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2983  	WORD $0xf749; BYTE $0xd0 // not    r8
  2984  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2985  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2986  	LONG $0x03e78348         // and    rdi, 3
  2987  	JE   LBB0_1519
  2988  
  2989  LBB0_1518:
  2990  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  2991  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2992  	LONG $0x01c68348         // add    rsi, 1
  2993  	LONG $0xffc78348         // add    rdi, -1
  2994  	JNE  LBB0_1518
  2995  
  2996  LBB0_1519:
  2997  	LONG $0x03f88349 // cmp    r8, 3
  2998  	JB   LBB0_1553
  2999  
  3000  LBB0_1520:
  3001  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  3002  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  3003  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  3004  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  3005  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  3006  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  3007  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  3008  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  3009  	LONG $0x04c68348             // add    rsi, 4
  3010  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  3011  	JNE  LBB0_1520
  3012  	JMP  LBB0_1553
  3013  
  3014  LBB0_413:
  3015  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3016  	JLE  LBB0_1553
  3017  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3018  	LONG $0x20f88341         // cmp    r8d, 32
  3019  	JB   LBB0_415
  3020  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  3021  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3022  	JBE  LBB0_902
  3023  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3024  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3025  	JBE  LBB0_902
  3026  
  3027  LBB0_415:
  3028  	WORD $0xf631 // xor    esi, esi
  3029  
  3030  LBB0_1525:
  3031  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3032  	WORD $0xf749; BYTE $0xd0 // not    r8
  3033  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3034  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3035  	LONG $0x03e78348         // and    rdi, 3
  3036  	JE   LBB0_1527
  3037  
  3038  LBB0_1526:
  3039  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3040  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3041  	LONG $0x01c68348         // add    rsi, 1
  3042  	LONG $0xffc78348         // add    rdi, -1
  3043  	JNE  LBB0_1526
  3044  
  3045  LBB0_1527:
  3046  	LONG $0x03f88349 // cmp    r8, 3
  3047  	JB   LBB0_1553
  3048  
  3049  LBB0_1528:
  3050  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3051  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3052  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  3053  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  3054  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  3055  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  3056  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  3057  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  3058  	LONG $0x04c68348         // add    rsi, 4
  3059  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  3060  	JNE  LBB0_1528
  3061  	JMP  LBB0_1553
  3062  
  3063  LBB0_416:
  3064  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3065  	JLE  LBB0_1553
  3066  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3067  	LONG $0x10f88341         // cmp    r8d, 16
  3068  	JAE  LBB0_715
  3069  	WORD $0xf631             // xor    esi, esi
  3070  	JMP  LBB0_994
  3071  
  3072  LBB0_419:
  3073  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3074  	JLE  LBB0_1553
  3075  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3076  	LONG $0x20f88341         // cmp    r8d, 32
  3077  	JB   LBB0_421
  3078  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  3079  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3080  	JBE  LBB0_905
  3081  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3082  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3083  	JBE  LBB0_905
  3084  
  3085  LBB0_421:
  3086  	WORD $0xf631 // xor    esi, esi
  3087  
  3088  LBB0_1533:
  3089  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3090  	WORD $0xf749; BYTE $0xd0 // not    r8
  3091  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3092  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3093  	LONG $0x03e78348         // and    rdi, 3
  3094  	JE   LBB0_1535
  3095  
  3096  LBB0_1534:
  3097  	LONG $0x3204be0f         // movsx    eax, byte [rdx + rsi]
  3098  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3099  	LONG $0x01c68348         // add    rsi, 1
  3100  	LONG $0xffc78348         // add    rdi, -1
  3101  	JNE  LBB0_1534
  3102  
  3103  LBB0_1535:
  3104  	LONG $0x03f88349 // cmp    r8, 3
  3105  	JB   LBB0_1553
  3106  
  3107  LBB0_1536:
  3108  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  3109  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  3110  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  3111  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
  3112  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  3113  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
  3114  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  3115  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
  3116  	LONG $0x04c68348             // add    rsi, 4
  3117  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  3118  	JNE  LBB0_1536
  3119  	JMP  LBB0_1553
  3120  
  3121  LBB0_422:
  3122  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3123  	JLE  LBB0_1553
  3124  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3125  	LONG $0x10f88341         // cmp    r8d, 16
  3126  	JAE  LBB0_721
  3127  	WORD $0xf631             // xor    esi, esi
  3128  	JMP  LBB0_999
  3129  
  3130  LBB0_425:
  3131  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3132  	JLE  LBB0_1553
  3133  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3134  	LONG $0x20f88341         // cmp    r8d, 32
  3135  	JAE  LBB0_724
  3136  	WORD $0xf631             // xor    esi, esi
  3137  	JMP  LBB0_1004
  3138  
  3139  LBB0_428:
  3140  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3141  	JLE  LBB0_1553
  3142  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3143  	LONG $0x20f88341         // cmp    r8d, 32
  3144  	JAE  LBB0_727
  3145  	WORD $0xf631             // xor    esi, esi
  3146  	JMP  LBB0_1009
  3147  
  3148  LBB0_431:
  3149  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3150  	JLE  LBB0_1553
  3151  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3152  	LONG $0x10f88341         // cmp    r8d, 16
  3153  	JAE  LBB0_730
  3154  	WORD $0xf631             // xor    esi, esi
  3155  	JMP  LBB0_1014
  3156  
  3157  LBB0_434:
  3158  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3159  	JLE  LBB0_1553
  3160  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3161  	LONG $0x20f88341         // cmp    r8d, 32
  3162  	JAE  LBB0_733
  3163  	WORD $0xf631             // xor    esi, esi
  3164  	JMP  LBB0_1019
  3165  
  3166  LBB0_437:
  3167  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3168  	JLE  LBB0_1553
  3169  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3170  	LONG $0x20f88341         // cmp    r8d, 32
  3171  	JB   LBB0_439
  3172  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  3173  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3174  	JBE  LBB0_908
  3175  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3176  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3177  	JBE  LBB0_908
  3178  
  3179  LBB0_439:
  3180  	WORD $0xf631 // xor    esi, esi
  3181  
  3182  LBB0_1541:
  3183  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3184  	WORD $0xf749; BYTE $0xd0 // not    r8
  3185  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3186  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3187  	LONG $0x03e78348         // and    rdi, 3
  3188  	JE   LBB0_1543
  3189  
  3190  LBB0_1542:
  3191  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  3192  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3193  	LONG $0x01c68348         // add    rsi, 1
  3194  	LONG $0xffc78348         // add    rdi, -1
  3195  	JNE  LBB0_1542
  3196  
  3197  LBB0_1543:
  3198  	LONG $0x03f88349 // cmp    r8, 3
  3199  	JB   LBB0_1553
  3200  
  3201  LBB0_1544:
  3202  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  3203  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  3204  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  3205  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
  3206  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  3207  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
  3208  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  3209  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
  3210  	LONG $0x04c68348             // add    rsi, 4
  3211  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  3212  	JNE  LBB0_1544
  3213  	JMP  LBB0_1553
  3214  
  3215  LBB0_440:
  3216  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3217  	JLE  LBB0_1553
  3218  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3219  	LONG $0x20f88341         // cmp    r8d, 32
  3220  	JB   LBB0_442
  3221  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  3222  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3223  	JBE  LBB0_911
  3224  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3225  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3226  	JBE  LBB0_911
  3227  
  3228  LBB0_442:
  3229  	WORD $0xf631 // xor    esi, esi
  3230  
  3231  LBB0_1549:
  3232  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3233  	WORD $0xf749; BYTE $0xd0 // not    r8
  3234  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3235  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3236  	LONG $0x03e78348         // and    rdi, 3
  3237  	JE   LBB0_1551
  3238  
  3239  LBB0_1550:
  3240  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3241  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3242  	LONG $0x01c68348         // add    rsi, 1
  3243  	LONG $0xffc78348         // add    rdi, -1
  3244  	JNE  LBB0_1550
  3245  
  3246  LBB0_1551:
  3247  	LONG $0x03f88349 // cmp    r8, 3
  3248  	JB   LBB0_1553
  3249  
  3250  LBB0_1552:
  3251  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3252  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3253  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  3254  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  3255  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  3256  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  3257  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  3258  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  3259  	LONG $0x04c68348         // add    rsi, 4
  3260  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  3261  	JNE  LBB0_1552
  3262  	JMP  LBB0_1553
  3263  
  3264  LBB0_446:
  3265  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3266  	WORD $0xff31             // xor    edi, edi
  3267  
  3268  LBB0_447:
  3269  	LONG $0x2cfbe1c4; WORD $0xfa1c             // vcvttsd2si    rbx, qword [rdx + 8*rdi]
  3270  	WORD $0x1c89; BYTE $0xb9                   // mov    dword [rcx + 4*rdi], ebx
  3271  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x08 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 8]
  3272  	LONG $0x04b95c89                           // mov    dword [rcx + 4*rdi + 4], ebx
  3273  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x10 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 16]
  3274  	LONG $0x08b95c89                           // mov    dword [rcx + 4*rdi + 8], ebx
  3275  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x18 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 24]
  3276  	LONG $0x0cb95c89                           // mov    dword [rcx + 4*rdi + 12], ebx
  3277  	LONG $0x04c78348                           // add    rdi, 4
  3278  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  3279  	JNE  LBB0_447
  3280  
  3281  LBB0_448:
  3282  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3283  	JE   LBB0_1553
  3284  	LONG $0xb90c8d48         // lea    rcx, [rcx + 4*rdi]
  3285  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  3286  	WORD $0xf631             // xor    esi, esi
  3287  
  3288  LBB0_450:
  3289  	LONG $0x2cfbe1c4; WORD $0xf23c // vcvttsd2si    rdi, qword [rdx + 8*rsi]
  3290  	WORD $0x3c89; BYTE $0xb1       // mov    dword [rcx + 4*rsi], edi
  3291  	LONG $0x01c68348               // add    rsi, 1
  3292  	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
  3293  	JNE  LBB0_450
  3294  	JMP  LBB0_1553
  3295  
  3296  LBB0_454:
  3297  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3298  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3299  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3300  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3301  	LONG $0x04e8c149         // shr    r8, 4
  3302  	LONG $0x01c08349         // add    r8, 1
  3303  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3304  	JE   LBB0_914
  3305  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3306  	LONG $0xfee08348         // and    rax, -2
  3307  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3308  	WORD $0xff31             // xor    edi, edi
  3309  
  3310  LBB0_456:
  3311  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  3312  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  3313  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  3314  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  3315  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  3316  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  3317  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  3318  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  3319  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  3320  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  3321  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  3322  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  3323  	QUAD $0x000080fa8410f8c5; BYTE $0x00       // vmovups    xmm0, oword [rdx + 8*rdi + 128]
  3324  	QUAD $0x0000a0fa8c10f8c5; BYTE $0x00       // vmovups    xmm1, oword [rdx + 8*rdi + 160]
  3325  	QUAD $0x0000c0fa9410f8c5; BYTE $0x00       // vmovups    xmm2, oword [rdx + 8*rdi + 192]
  3326  	QUAD $0x0000e0fa9c10f8c5; BYTE $0x00       // vmovups    xmm3, oword [rdx + 8*rdi + 224]
  3327  	QUAD $0x000090fa84c6f8c5; WORD $0x8800     // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 144], 136
  3328  	QUAD $0x0000b0fa8cc6f0c5; WORD $0x8800     // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 176], 136
  3329  	QUAD $0x0000d0fa94c6e8c5; WORD $0x8800     // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 208], 136
  3330  	QUAD $0x0000f0fa9cc6e0c5; WORD $0x8800     // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 240], 136
  3331  	LONG $0x4411f8c5; WORD $0x40b9             // vmovups    oword [rcx + 4*rdi + 64], xmm0
  3332  	LONG $0x4c11f8c5; WORD $0x50b9             // vmovups    oword [rcx + 4*rdi + 80], xmm1
  3333  	LONG $0x5411f8c5; WORD $0x60b9             // vmovups    oword [rcx + 4*rdi + 96], xmm2
  3334  	LONG $0x5c11f8c5; WORD $0x70b9             // vmovups    oword [rcx + 4*rdi + 112], xmm3
  3335  	LONG $0x20c78348                           // add    rdi, 32
  3336  	LONG $0x02c08348                           // add    rax, 2
  3337  	JNE  LBB0_456
  3338  	JMP  LBB0_915
  3339  
  3340  LBB0_457:
  3341  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3342  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3343  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3344  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3345  	LONG $0x05e8c149         // shr    r8, 5
  3346  	LONG $0x01c08349         // add    r8, 1
  3347  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3348  	JE   LBB0_1020
  3349  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3350  	LONG $0xfee08348         // and    rax, -2
  3351  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3352  	WORD $0xff31             // xor    edi, edi
  3353  
  3354  LBB0_459:
  3355  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  3356  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  3357  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  3358  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  3359  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  3360  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  3361  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  3362  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  3363  	LONG $0x337de2c4; WORD $0x7a44; BYTE $0x40 // vpmovzxwd    ymm0, oword [rdx + 2*rdi + 64]
  3364  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 80]
  3365  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x60 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 96]
  3366  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 112]
  3367  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  3368  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  3369  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  3370  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  3371  	LONG $0x40c78348                           // add    rdi, 64
  3372  	LONG $0x02c08348                           // add    rax, 2
  3373  	JNE  LBB0_459
  3374  	JMP  LBB0_1021
  3375  
  3376  LBB0_460:
  3377  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3378  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3379  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3380  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3381  	LONG $0x05e8c149         // shr    r8, 5
  3382  	LONG $0x01c08349         // add    r8, 1
  3383  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3384  	JE   LBB0_1025
  3385  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3386  	LONG $0xfee08348         // and    rax, -2
  3387  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3388  	WORD $0xff31             // xor    edi, edi
  3389  
  3390  LBB0_462:
  3391  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
  3392  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
  3393  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
  3394  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
  3395  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  3396  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  3397  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  3398  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  3399  	LONG $0x237de2c4; WORD $0x7a44; BYTE $0x40 // vpmovsxwd    ymm0, oword [rdx + 2*rdi + 64]
  3400  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 80]
  3401  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x60 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 96]
  3402  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 112]
  3403  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  3404  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  3405  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  3406  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  3407  	LONG $0x40c78348                           // add    rdi, 64
  3408  	LONG $0x02c08348                           // add    rax, 2
  3409  	JNE  LBB0_462
  3410  	JMP  LBB0_1026
  3411  
  3412  LBB0_463:
  3413  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3414  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3415  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3416  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3417  	LONG $0x04e8c149         // shr    r8, 4
  3418  	LONG $0x01c08349         // add    r8, 1
  3419  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3420  	JE   LBB0_1030
  3421  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3422  	LONG $0xfee08348         // and    rax, -2
  3423  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3424  	WORD $0xff31             // xor    edi, edi
  3425  
  3426  LBB0_465:
  3427  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  3428  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  3429  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  3430  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  3431  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  3432  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  3433  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  3434  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  3435  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  3436  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  3437  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  3438  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  3439  	QUAD $0x000080fa8410f8c5; BYTE $0x00       // vmovups    xmm0, oword [rdx + 8*rdi + 128]
  3440  	QUAD $0x0000a0fa8c10f8c5; BYTE $0x00       // vmovups    xmm1, oword [rdx + 8*rdi + 160]
  3441  	QUAD $0x0000c0fa9410f8c5; BYTE $0x00       // vmovups    xmm2, oword [rdx + 8*rdi + 192]
  3442  	QUAD $0x0000e0fa9c10f8c5; BYTE $0x00       // vmovups    xmm3, oword [rdx + 8*rdi + 224]
  3443  	QUAD $0x000090fa84c6f8c5; WORD $0x8800     // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 144], 136
  3444  	QUAD $0x0000b0fa8cc6f0c5; WORD $0x8800     // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 176], 136
  3445  	QUAD $0x0000d0fa94c6e8c5; WORD $0x8800     // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 208], 136
  3446  	QUAD $0x0000f0fa9cc6e0c5; WORD $0x8800     // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 240], 136
  3447  	LONG $0x4411f8c5; WORD $0x40b9             // vmovups    oword [rcx + 4*rdi + 64], xmm0
  3448  	LONG $0x4c11f8c5; WORD $0x50b9             // vmovups    oword [rcx + 4*rdi + 80], xmm1
  3449  	LONG $0x5411f8c5; WORD $0x60b9             // vmovups    oword [rcx + 4*rdi + 96], xmm2
  3450  	LONG $0x5c11f8c5; WORD $0x70b9             // vmovups    oword [rcx + 4*rdi + 112], xmm3
  3451  	LONG $0x20c78348                           // add    rdi, 32
  3452  	LONG $0x02c08348                           // add    rax, 2
  3453  	JNE  LBB0_465
  3454  	JMP  LBB0_1031
  3455  
  3456  LBB0_466:
  3457  	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
  3458  	WORD $0xe683; BYTE $0xf0       // and    esi, -16
  3459  	LONG $0xf0468d48               // lea    rax, [rsi - 16]
  3460  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
  3461  	LONG $0x04e8c149               // shr    r8, 4
  3462  	LONG $0x01c08349               // add    r8, 1
  3463  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
  3464  	JE   LBB0_1035
  3465  	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
  3466  	LONG $0xfee08348               // and    rax, -2
  3467  	WORD $0xf748; BYTE $0xd8       // neg    rax
  3468  	WORD $0xff31                   // xor    edi, edi
  3469  	LONG $0x1879e2c4; WORD $0x2c45 // vbroadcastss    xmm0, dword 44[rbp] /* [rip + .LCPI0_2] */
  3470  	LONG $0x1879e2c4; WORD $0x304d // vbroadcastss    xmm1, dword 48[rbp] /* [rip + .LCPI0_3] */
  3471  
  3472  LBB0_468:
  3473  	LONG $0x1410f8c5; BYTE $0xba   // vmovups    xmm2, oword [rdx + 4*rdi]
  3474  	LONG $0x5c10f8c5; WORD $0x10ba // vmovups    xmm3, oword [rdx + 4*rdi + 16]
  3475  	LONG $0x6410f8c5; WORD $0x20ba // vmovups    xmm4, oword [rdx + 4*rdi + 32]
  3476  	LONG $0xe8c2e8c5; BYTE $0x01   // vcmpltps    xmm5, xmm2, xmm0
  3477  	LONG $0xf05ce8c5               // vsubps    xmm6, xmm2, xmm0
  3478  	LONG $0xf65bfac5               // vcvttps2dq    xmm6, xmm6
  3479  	LONG $0xf157c8c5               // vxorps    xmm6, xmm6, xmm1
  3480  	LONG $0xd25bfac5               // vcvttps2dq    xmm2, xmm2
  3481  	LONG $0x4a49e3c4; WORD $0x50d2 // vblendvps    xmm2, xmm6, xmm2, xmm5
  3482  	LONG $0x6c10f8c5; WORD $0x30ba // vmovups    xmm5, oword [rdx + 4*rdi + 48]
  3483  	LONG $0xf0c2e0c5; BYTE $0x01   // vcmpltps    xmm6, xmm3, xmm0
  3484  	LONG $0xf85ce0c5               // vsubps    xmm7, xmm3, xmm0
  3485  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3486  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3487  	LONG $0xdb5bfac5               // vcvttps2dq    xmm3, xmm3
  3488  	LONG $0x4a41e3c4; WORD $0x60db // vblendvps    xmm3, xmm7, xmm3, xmm6
  3489  	LONG $0xf0c2d8c5; BYTE $0x01   // vcmpltps    xmm6, xmm4, xmm0
  3490  	LONG $0xf85cd8c5               // vsubps    xmm7, xmm4, xmm0
  3491  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3492  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3493  	LONG $0xe45bfac5               // vcvttps2dq    xmm4, xmm4
  3494  	LONG $0x4a41e3c4; WORD $0x60e4 // vblendvps    xmm4, xmm7, xmm4, xmm6
  3495  	LONG $0xf0c2d0c5; BYTE $0x01   // vcmpltps    xmm6, xmm5, xmm0
  3496  	LONG $0xf85cd0c5               // vsubps    xmm7, xmm5, xmm0
  3497  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3498  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3499  	LONG $0xed5bfac5               // vcvttps2dq    xmm5, xmm5
  3500  	LONG $0x4a41e3c4; WORD $0x60ed // vblendvps    xmm5, xmm7, xmm5, xmm6
  3501  	LONG $0x1411f8c5; BYTE $0xb9   // vmovups    oword [rcx + 4*rdi], xmm2
  3502  	LONG $0x5c11f8c5; WORD $0x10b9 // vmovups    oword [rcx + 4*rdi + 16], xmm3
  3503  	LONG $0x6411f8c5; WORD $0x20b9 // vmovups    oword [rcx + 4*rdi + 32], xmm4
  3504  	LONG $0x6c11f8c5; WORD $0x30b9 // vmovups    oword [rcx + 4*rdi + 48], xmm5
  3505  	LONG $0x5410f8c5; WORD $0x40ba // vmovups    xmm2, oword [rdx + 4*rdi + 64]
  3506  	LONG $0x5c10f8c5; WORD $0x50ba // vmovups    xmm3, oword [rdx + 4*rdi + 80]
  3507  	LONG $0x6410f8c5; WORD $0x60ba // vmovups    xmm4, oword [rdx + 4*rdi + 96]
  3508  	LONG $0xe8c2e8c5; BYTE $0x01   // vcmpltps    xmm5, xmm2, xmm0
  3509  	LONG $0xf05ce8c5               // vsubps    xmm6, xmm2, xmm0
  3510  	LONG $0xf65bfac5               // vcvttps2dq    xmm6, xmm6
  3511  	LONG $0xf157c8c5               // vxorps    xmm6, xmm6, xmm1
  3512  	LONG $0xd25bfac5               // vcvttps2dq    xmm2, xmm2
  3513  	LONG $0x4a49e3c4; WORD $0x50d2 // vblendvps    xmm2, xmm6, xmm2, xmm5
  3514  	LONG $0x6c10f8c5; WORD $0x70ba // vmovups    xmm5, oword [rdx + 4*rdi + 112]
  3515  	LONG $0xf0c2e0c5; BYTE $0x01   // vcmpltps    xmm6, xmm3, xmm0
  3516  	LONG $0xf85ce0c5               // vsubps    xmm7, xmm3, xmm0
  3517  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3518  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3519  	LONG $0xdb5bfac5               // vcvttps2dq    xmm3, xmm3
  3520  	LONG $0x4a41e3c4; WORD $0x60db // vblendvps    xmm3, xmm7, xmm3, xmm6
  3521  	LONG $0xf0c2d8c5; BYTE $0x01   // vcmpltps    xmm6, xmm4, xmm0
  3522  	LONG $0xf85cd8c5               // vsubps    xmm7, xmm4, xmm0
  3523  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3524  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3525  	LONG $0xe45bfac5               // vcvttps2dq    xmm4, xmm4
  3526  	LONG $0x4a41e3c4; WORD $0x60e4 // vblendvps    xmm4, xmm7, xmm4, xmm6
  3527  	LONG $0xf0c2d0c5; BYTE $0x01   // vcmpltps    xmm6, xmm5, xmm0
  3528  	LONG $0xf85cd0c5               // vsubps    xmm7, xmm5, xmm0
  3529  	LONG $0xff5bfac5               // vcvttps2dq    xmm7, xmm7
  3530  	LONG $0xf957c0c5               // vxorps    xmm7, xmm7, xmm1
  3531  	LONG $0xed5bfac5               // vcvttps2dq    xmm5, xmm5
  3532  	LONG $0x4a41e3c4; WORD $0x60ed // vblendvps    xmm5, xmm7, xmm5, xmm6
  3533  	LONG $0x5411f8c5; WORD $0x40b9 // vmovups    oword [rcx + 4*rdi + 64], xmm2
  3534  	LONG $0x5c11f8c5; WORD $0x50b9 // vmovups    oword [rcx + 4*rdi + 80], xmm3
  3535  	LONG $0x6411f8c5; WORD $0x60b9 // vmovups    oword [rcx + 4*rdi + 96], xmm4
  3536  	LONG $0x6c11f8c5; WORD $0x70b9 // vmovups    oword [rcx + 4*rdi + 112], xmm5
  3537  	LONG $0x20c78348               // add    rdi, 32
  3538  	LONG $0x02c08348               // add    rax, 2
  3539  	JNE  LBB0_468
  3540  	JMP  LBB0_1036
  3541  
  3542  LBB0_475:
  3543  	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
  3544  	WORD $0xe683; BYTE $0xf0       // and    esi, -16
  3545  	LONG $0xf0468d48               // lea    rax, [rsi - 16]
  3546  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
  3547  	LONG $0x04e8c149               // shr    r8, 4
  3548  	LONG $0x01c08349               // add    r8, 1
  3549  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
  3550  	JE   LBB0_1040
  3551  	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
  3552  	LONG $0xfee08348               // and    rax, -2
  3553  	WORD $0xf748; BYTE $0xd8       // neg    rax
  3554  	WORD $0xff31                   // xor    edi, edi
  3555  	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI0_5] */
  3556  
  3557  LBB0_477:
  3558  	LONG $0x357de2c4; WORD $0xba0c             // vpmovzxdq    ymm1, oword [rdx + 4*rdi]
  3559  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x10 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 16]
  3560  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x20 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 32]
  3561  	LONG $0x357de2c4; WORD $0xba64; BYTE $0x30 // vpmovzxdq    ymm4, oword [rdx + 4*rdi + 48]
  3562  	LONG $0xc8ebf5c5                           // vpor    ymm1, ymm1, ymm0
  3563  	LONG $0xc85cf5c5                           // vsubpd    ymm1, ymm1, ymm0
  3564  	LONG $0xd0ebedc5                           // vpor    ymm2, ymm2, ymm0
  3565  	LONG $0xd05cedc5                           // vsubpd    ymm2, ymm2, ymm0
  3566  	LONG $0xd8ebe5c5                           // vpor    ymm3, ymm3, ymm0
  3567  	LONG $0xd85ce5c5                           // vsubpd    ymm3, ymm3, ymm0
  3568  	LONG $0xe0ebddc5                           // vpor    ymm4, ymm4, ymm0
  3569  	LONG $0xe05cddc5                           // vsubpd    ymm4, ymm4, ymm0
  3570  	LONG $0x0c11fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm1
  3571  	LONG $0x5411fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm2
  3572  	LONG $0x5c11fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm3
  3573  	LONG $0x6411fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm4
  3574  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x40 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 64]
  3575  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x50 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 80]
  3576  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x60 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 96]
  3577  	LONG $0x357de2c4; WORD $0xba64; BYTE $0x70 // vpmovzxdq    ymm4, oword [rdx + 4*rdi + 112]
  3578  	LONG $0xc8ebf5c5                           // vpor    ymm1, ymm1, ymm0
  3579  	LONG $0xc85cf5c5                           // vsubpd    ymm1, ymm1, ymm0
  3580  	LONG $0xd0ebedc5                           // vpor    ymm2, ymm2, ymm0
  3581  	LONG $0xd05cedc5                           // vsubpd    ymm2, ymm2, ymm0
  3582  	LONG $0xd8ebe5c5                           // vpor    ymm3, ymm3, ymm0
  3583  	LONG $0xd85ce5c5                           // vsubpd    ymm3, ymm3, ymm0
  3584  	LONG $0xe0ebddc5                           // vpor    ymm4, ymm4, ymm0
  3585  	LONG $0xe05cddc5                           // vsubpd    ymm4, ymm4, ymm0
  3586  	QUAD $0x000080f98c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 128], ymm1
  3587  	QUAD $0x0000a0f99411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 160], ymm2
  3588  	QUAD $0x0000c0f99c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 192], ymm3
  3589  	QUAD $0x0000e0f9a411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 224], ymm4
  3590  	LONG $0x20c78348                           // add    rdi, 32
  3591  	LONG $0x02c08348                           // add    rax, 2
  3592  	JNE  LBB0_477
  3593  	JMP  LBB0_1041
  3594  
  3595  LBB0_484:
  3596  	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
  3597  	WORD $0xe683; BYTE $0xf0       // and    esi, -16
  3598  	LONG $0xf0468d48               // lea    rax, [rsi - 16]
  3599  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
  3600  	LONG $0x04e8c149               // shr    r8, 4
  3601  	LONG $0x01c08349               // add    r8, 1
  3602  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
  3603  	JE   LBB0_919
  3604  	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
  3605  	LONG $0xfee08348               // and    rax, -2
  3606  	WORD $0xf748; BYTE $0xd8       // neg    rax
  3607  	WORD $0xff31                   // xor    edi, edi
  3608  	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI0_5] */
  3609  	LONG $0xc9eff1c5               // vpxor    xmm1, xmm1, xmm1
  3610  	LONG $0x597de2c4; WORD $0x1055 // vpbroadcastq    ymm2, qword 16[rbp] /* [rip + .LCPI0_6] */
  3611  	LONG $0x197de2c4; WORD $0x185d // vbroadcastsd    ymm3, qword 24[rbp] /* [rip + .LCPI0_7] */
  3612  
  3613  LBB0_486:
  3614  	LONG $0x246ffec5; BYTE $0xfa         // vmovdqu    ymm4, yword [rdx + 8*rdi]
  3615  	LONG $0x6c6ffec5; WORD $0x20fa       // vmovdqu    ymm5, yword [rdx + 8*rdi + 32]
  3616  	LONG $0x746ffec5; WORD $0x40fa       // vmovdqu    ymm6, yword [rdx + 8*rdi + 64]
  3617  	LONG $0x7c6ffec5; WORD $0x60fa       // vmovdqu    ymm7, yword [rdx + 8*rdi + 96]
  3618  	LONG $0x025d63c4; WORD $0xaac1       // vpblendd    ymm8, ymm4, ymm1, 170
  3619  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3620  	LONG $0xd473ddc5; BYTE $0x20         // vpsrlq    ymm4, ymm4, 32
  3621  	LONG $0xe2ebddc5                     // vpor    ymm4, ymm4, ymm2
  3622  	LONG $0xe35cddc5                     // vsubpd    ymm4, ymm4, ymm3
  3623  	LONG $0xe458bdc5                     // vaddpd    ymm4, ymm8, ymm4
  3624  	LONG $0x025563c4; WORD $0xaac1       // vpblendd    ymm8, ymm5, ymm1, 170
  3625  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3626  	LONG $0xd573d5c5; BYTE $0x20         // vpsrlq    ymm5, ymm5, 32
  3627  	LONG $0xeaebd5c5                     // vpor    ymm5, ymm5, ymm2
  3628  	LONG $0xeb5cd5c5                     // vsubpd    ymm5, ymm5, ymm3
  3629  	LONG $0xed58bdc5                     // vaddpd    ymm5, ymm8, ymm5
  3630  	LONG $0x024d63c4; WORD $0xaac1       // vpblendd    ymm8, ymm6, ymm1, 170
  3631  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3632  	LONG $0xd673cdc5; BYTE $0x20         // vpsrlq    ymm6, ymm6, 32
  3633  	LONG $0xf2ebcdc5                     // vpor    ymm6, ymm6, ymm2
  3634  	LONG $0xf35ccdc5                     // vsubpd    ymm6, ymm6, ymm3
  3635  	LONG $0xf658bdc5                     // vaddpd    ymm6, ymm8, ymm6
  3636  	LONG $0x024563c4; WORD $0xaac1       // vpblendd    ymm8, ymm7, ymm1, 170
  3637  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3638  	LONG $0xd773c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm7, 32
  3639  	LONG $0xfaebc5c5                     // vpor    ymm7, ymm7, ymm2
  3640  	LONG $0xfb5cc5c5                     // vsubpd    ymm7, ymm7, ymm3
  3641  	LONG $0xff58bdc5                     // vaddpd    ymm7, ymm8, ymm7
  3642  	LONG $0x2411fdc5; BYTE $0xf9         // vmovupd    yword [rcx + 8*rdi], ymm4
  3643  	LONG $0x6c11fdc5; WORD $0x20f9       // vmovupd    yword [rcx + 8*rdi + 32], ymm5
  3644  	LONG $0x7411fdc5; WORD $0x40f9       // vmovupd    yword [rcx + 8*rdi + 64], ymm6
  3645  	LONG $0x7c11fdc5; WORD $0x60f9       // vmovupd    yword [rcx + 8*rdi + 96], ymm7
  3646  	QUAD $0x000080faa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 8*rdi + 128]
  3647  	QUAD $0x0000a0faac6ffec5; BYTE $0x00 // vmovdqu    ymm5, yword [rdx + 8*rdi + 160]
  3648  	QUAD $0x0000c0fab46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdx + 8*rdi + 192]
  3649  	QUAD $0x0000e0fabc6ffec5; BYTE $0x00 // vmovdqu    ymm7, yword [rdx + 8*rdi + 224]
  3650  	LONG $0x025d63c4; WORD $0xaac1       // vpblendd    ymm8, ymm4, ymm1, 170
  3651  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3652  	LONG $0xd473ddc5; BYTE $0x20         // vpsrlq    ymm4, ymm4, 32
  3653  	LONG $0xe2ebddc5                     // vpor    ymm4, ymm4, ymm2
  3654  	LONG $0xe35cddc5                     // vsubpd    ymm4, ymm4, ymm3
  3655  	LONG $0xe458bdc5                     // vaddpd    ymm4, ymm8, ymm4
  3656  	LONG $0x025563c4; WORD $0xaac1       // vpblendd    ymm8, ymm5, ymm1, 170
  3657  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3658  	LONG $0xd573d5c5; BYTE $0x20         // vpsrlq    ymm5, ymm5, 32
  3659  	LONG $0xeaebd5c5                     // vpor    ymm5, ymm5, ymm2
  3660  	LONG $0xeb5cd5c5                     // vsubpd    ymm5, ymm5, ymm3
  3661  	LONG $0xed58bdc5                     // vaddpd    ymm5, ymm8, ymm5
  3662  	LONG $0x024d63c4; WORD $0xaac1       // vpblendd    ymm8, ymm6, ymm1, 170
  3663  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3664  	LONG $0xd673cdc5; BYTE $0x20         // vpsrlq    ymm6, ymm6, 32
  3665  	LONG $0xf2ebcdc5                     // vpor    ymm6, ymm6, ymm2
  3666  	LONG $0xf35ccdc5                     // vsubpd    ymm6, ymm6, ymm3
  3667  	LONG $0xf658bdc5                     // vaddpd    ymm6, ymm8, ymm6
  3668  	LONG $0x024563c4; WORD $0xaac1       // vpblendd    ymm8, ymm7, ymm1, 170
  3669  	LONG $0xc0eb3dc5                     // vpor    ymm8, ymm8, ymm0
  3670  	LONG $0xd773c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm7, 32
  3671  	LONG $0xfaebc5c5                     // vpor    ymm7, ymm7, ymm2
  3672  	LONG $0xfb5cc5c5                     // vsubpd    ymm7, ymm7, ymm3
  3673  	LONG $0xff58bdc5                     // vaddpd    ymm7, ymm8, ymm7
  3674  	QUAD $0x000080f9a411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm4
  3675  	QUAD $0x0000a0f9ac11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm5
  3676  	QUAD $0x0000c0f9b411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm6
  3677  	QUAD $0x0000e0f9bc11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm7
  3678  	LONG $0x20c78348                     // add    rdi, 32
  3679  	LONG $0x02c08348                     // add    rax, 2
  3680  	JNE  LBB0_486
  3681  	JMP  LBB0_920
  3682  
  3683  LBB0_487:
  3684  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3685  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3686  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3687  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3688  	LONG $0x04e8c149         // shr    r8, 4
  3689  	LONG $0x01c08349         // add    r8, 1
  3690  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3691  	JE   LBB0_1045
  3692  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3693  	LONG $0xfee08348         // and    rax, -2
  3694  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3695  	WORD $0xff31             // xor    edi, edi
  3696  
  3697  LBB0_489:
  3698  	LONG $0x3379e2c4; WORD $0x7a04             // vpmovzxwd    xmm0, qword [rdx + 2*rdi]
  3699  	LONG $0x3379e2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  3700  	LONG $0x3379e2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwd    xmm2, qword [rdx + 2*rdi + 16]
  3701  	LONG $0x3379e2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwd    xmm3, qword [rdx + 2*rdi + 24]
  3702  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  3703  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  3704  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  3705  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  3706  	LONG $0x0411fcc5; BYTE $0xf9               // vmovups    yword [rcx + 8*rdi], ymm0
  3707  	LONG $0x4c11fcc5; WORD $0x20f9             // vmovups    yword [rcx + 8*rdi + 32], ymm1
  3708  	LONG $0x5411fcc5; WORD $0x40f9             // vmovups    yword [rcx + 8*rdi + 64], ymm2
  3709  	LONG $0x5c11fcc5; WORD $0x60f9             // vmovups    yword [rcx + 8*rdi + 96], ymm3
  3710  	LONG $0x3379e2c4; WORD $0x7a44; BYTE $0x20 // vpmovzxwd    xmm0, qword [rdx + 2*rdi + 32]
  3711  	LONG $0x3379e2c4; WORD $0x7a4c; BYTE $0x28 // vpmovzxwd    xmm1, qword [rdx + 2*rdi + 40]
  3712  	LONG $0x3379e2c4; WORD $0x7a54; BYTE $0x30 // vpmovzxwd    xmm2, qword [rdx + 2*rdi + 48]
  3713  	LONG $0x3379e2c4; WORD $0x7a5c; BYTE $0x38 // vpmovzxwd    xmm3, qword [rdx + 2*rdi + 56]
  3714  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  3715  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  3716  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  3717  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  3718  	QUAD $0x000080f98411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  3719  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  3720  	QUAD $0x0000c0f99411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  3721  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  3722  	LONG $0x20c78348                           // add    rdi, 32
  3723  	LONG $0x02c08348                           // add    rax, 2
  3724  	JNE  LBB0_489
  3725  	JMP  LBB0_1046
  3726  
  3727  LBB0_490:
  3728  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3729  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3730  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3731  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3732  	LONG $0x04e8c149         // shr    r8, 4
  3733  	LONG $0x01c08349         // add    r8, 1
  3734  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3735  	JE   LBB0_1050
  3736  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3737  	LONG $0xfee08348         // and    rax, -2
  3738  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3739  	WORD $0xff31             // xor    edi, edi
  3740  
  3741  LBB0_492:
  3742  	LONG $0x2379e2c4; WORD $0x7a04             // vpmovsxwd    xmm0, qword [rdx + 2*rdi]
  3743  	LONG $0x2379e2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  3744  	LONG $0x2379e2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwd    xmm2, qword [rdx + 2*rdi + 16]
  3745  	LONG $0x2379e2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwd    xmm3, qword [rdx + 2*rdi + 24]
  3746  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  3747  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  3748  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  3749  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  3750  	LONG $0x0411fcc5; BYTE $0xf9               // vmovups    yword [rcx + 8*rdi], ymm0
  3751  	LONG $0x4c11fcc5; WORD $0x20f9             // vmovups    yword [rcx + 8*rdi + 32], ymm1
  3752  	LONG $0x5411fcc5; WORD $0x40f9             // vmovups    yword [rcx + 8*rdi + 64], ymm2
  3753  	LONG $0x5c11fcc5; WORD $0x60f9             // vmovups    yword [rcx + 8*rdi + 96], ymm3
  3754  	LONG $0x2379e2c4; WORD $0x7a44; BYTE $0x20 // vpmovsxwd    xmm0, qword [rdx + 2*rdi + 32]
  3755  	LONG $0x2379e2c4; WORD $0x7a4c; BYTE $0x28 // vpmovsxwd    xmm1, qword [rdx + 2*rdi + 40]
  3756  	LONG $0x2379e2c4; WORD $0x7a54; BYTE $0x30 // vpmovsxwd    xmm2, qword [rdx + 2*rdi + 48]
  3757  	LONG $0x2379e2c4; WORD $0x7a5c; BYTE $0x38 // vpmovsxwd    xmm3, qword [rdx + 2*rdi + 56]
  3758  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  3759  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  3760  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  3761  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  3762  	QUAD $0x000080f98411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  3763  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  3764  	QUAD $0x0000c0f99411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  3765  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  3766  	LONG $0x20c78348                           // add    rdi, 32
  3767  	LONG $0x02c08348                           // add    rax, 2
  3768  	JNE  LBB0_492
  3769  	JMP  LBB0_1051
  3770  
  3771  LBB0_493:
  3772  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3773  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3774  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3775  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3776  	LONG $0x04e8c149         // shr    r8, 4
  3777  	LONG $0x01c08349         // add    r8, 1
  3778  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3779  	JE   LBB0_1055
  3780  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
  3781  	LONG $0xfee28349         // and    r10, -2
  3782  	WORD $0xf749; BYTE $0xda // neg    r10
  3783  	WORD $0xff31             // xor    edi, edi
  3784  
  3785  LBB0_495:
  3786  	LONG $0x046ffac5; BYTE $0xfa         // vmovdqu    xmm0, oword [rdx + 8*rdi]
  3787  	LONG $0x4c6ffac5; WORD $0x10fa       // vmovdqu    xmm1, oword [rdx + 8*rdi + 16]
  3788  	LONG $0x16f9e3c4; WORD $0x01c0       // vpextrq    rax, xmm0, 1
  3789  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3790  	LONG $0x5c6ffac5; WORD $0x20fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  3791  	LONG $0x7ef9e1c4; BYTE $0xc0         // vmovq    rax, xmm0
  3792  	LONG $0x2aa3e1c4; BYTE $0xc0         // vcvtsi2sd    xmm0, xmm11, rax
  3793  	LONG $0x16f9e3c4; WORD $0x01c8       // vpextrq    rax, xmm1, 1
  3794  	LONG $0x2aa3e1c4; BYTE $0xe0         // vcvtsi2sd    xmm4, xmm11, rax
  3795  	LONG $0x6c6ffac5; WORD $0x30fa       // vmovdqu    xmm5, oword [rdx + 8*rdi + 48]
  3796  	LONG $0x7ef9e1c4; BYTE $0xc8         // vmovq    rax, xmm1
  3797  	LONG $0x2aa3e1c4; BYTE $0xc8         // vcvtsi2sd    xmm1, xmm11, rax
  3798  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  3799  	LONG $0x2aa3e1c4; BYTE $0xf0         // vcvtsi2sd    xmm6, xmm11, rax
  3800  	LONG $0xc21479c5                     // vunpcklpd    xmm8, xmm0, xmm2
  3801  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  3802  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3803  	LONG $0x16f9e3c4; WORD $0x01d8       // vpextrq    rax, xmm3, 1
  3804  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3805  	LONG $0xd41471c5                     // vunpcklpd    xmm10, xmm1, xmm4
  3806  	LONG $0x7ef9e1c4; BYTE $0xd8         // vmovq    rax, xmm3
  3807  	LONG $0x2aa3e1c4; BYTE $0xd8         // vcvtsi2sd    xmm3, xmm11, rax
  3808  	LONG $0xce1469c5                     // vunpcklpd    xmm9, xmm2, xmm6
  3809  	LONG $0x646ffac5; WORD $0x50fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 80]
  3810  	LONG $0x16f9e3c4; WORD $0x01e0       // vpextrq    rax, xmm4, 1
  3811  	LONG $0xdd14e1c5                     // vunpcklpd    xmm3, xmm3, xmm5
  3812  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3813  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  3814  	LONG $0x2aa3e1c4; BYTE $0xe0         // vcvtsi2sd    xmm4, xmm11, rax
  3815  	LONG $0xe514d9c5                     // vunpcklpd    xmm4, xmm4, xmm5
  3816  	LONG $0x6c6ffac5; WORD $0x40fa       // vmovdqu    xmm5, oword [rdx + 8*rdi + 64]
  3817  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  3818  	LONG $0x2aa3e1c4; BYTE $0xf0         // vcvtsi2sd    xmm6, xmm11, rax
  3819  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  3820  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3821  	LONG $0x7c6ffac5; WORD $0x70fa       // vmovdqu    xmm7, oword [rdx + 8*rdi + 112]
  3822  	LONG $0x16f9e3c4; WORD $0x01f8       // vpextrq    rax, xmm7, 1
  3823  	LONG $0x2aa3e1c4; BYTE $0xc0         // vcvtsi2sd    xmm0, xmm11, rax
  3824  	LONG $0x7ef9e1c4; BYTE $0xf8         // vmovq    rax, xmm7
  3825  	LONG $0x2aa3e1c4; BYTE $0xf8         // vcvtsi2sd    xmm7, xmm11, rax
  3826  	LONG $0x546ffac5; WORD $0x60fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 96]
  3827  	LONG $0x16f9e3c4; WORD $0x01d0       // vpextrq    rax, xmm2, 1
  3828  	LONG $0x2aa3e1c4; BYTE $0xc8         // vcvtsi2sd    xmm1, xmm11, rax
  3829  	LONG $0xee14d1c5                     // vunpcklpd    xmm5, xmm5, xmm6
  3830  	LONG $0x7ef9e1c4; BYTE $0xd0         // vmovq    rax, xmm2
  3831  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3832  	LONG $0xc014c1c5                     // vunpcklpd    xmm0, xmm7, xmm0
  3833  	LONG $0xc914e9c5                     // vunpcklpd    xmm1, xmm2, xmm1
  3834  	LONG $0x541179c5; WORD $0x10f9       // vmovupd    oword [rcx + 8*rdi + 16], xmm10
  3835  	LONG $0x041179c5; BYTE $0xf9         // vmovupd    oword [rcx + 8*rdi], xmm8
  3836  	LONG $0x5c11f9c5; WORD $0x20f9       // vmovupd    oword [rcx + 8*rdi + 32], xmm3
  3837  	LONG $0x4c1179c5; WORD $0x30f9       // vmovupd    oword [rcx + 8*rdi + 48], xmm9
  3838  	LONG $0x6c11f9c5; WORD $0x40f9       // vmovupd    oword [rcx + 8*rdi + 64], xmm5
  3839  	LONG $0x6411f9c5; WORD $0x50f9       // vmovupd    oword [rcx + 8*rdi + 80], xmm4
  3840  	LONG $0x4c11f9c5; WORD $0x60f9       // vmovupd    oword [rcx + 8*rdi + 96], xmm1
  3841  	LONG $0x4411f9c5; WORD $0x70f9       // vmovupd    oword [rcx + 8*rdi + 112], xmm0
  3842  	QUAD $0x000080fa846ffac5; BYTE $0x00 // vmovdqu    xmm0, oword [rdx + 8*rdi + 128]
  3843  	QUAD $0x000090fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 144]
  3844  	LONG $0x16f9e3c4; WORD $0x01c0       // vpextrq    rax, xmm0, 1
  3845  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3846  	QUAD $0x0000a0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 160]
  3847  	LONG $0x7ef9e1c4; BYTE $0xc0         // vmovq    rax, xmm0
  3848  	LONG $0x2aa3e1c4; BYTE $0xc0         // vcvtsi2sd    xmm0, xmm11, rax
  3849  	LONG $0x16f9e3c4; WORD $0x01c8       // vpextrq    rax, xmm1, 1
  3850  	LONG $0x2aa3e1c4; BYTE $0xe0         // vcvtsi2sd    xmm4, xmm11, rax
  3851  	QUAD $0x0000b0faac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 8*rdi + 176]
  3852  	LONG $0x7ef9e1c4; BYTE $0xc8         // vmovq    rax, xmm1
  3853  	LONG $0x2aa3e1c4; BYTE $0xc8         // vcvtsi2sd    xmm1, xmm11, rax
  3854  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  3855  	LONG $0x2aa3e1c4; BYTE $0xf0         // vcvtsi2sd    xmm6, xmm11, rax
  3856  	LONG $0xc21479c5                     // vunpcklpd    xmm8, xmm0, xmm2
  3857  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  3858  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3859  	LONG $0x16f9e3c4; WORD $0x01d8       // vpextrq    rax, xmm3, 1
  3860  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3861  	LONG $0xd41471c5                     // vunpcklpd    xmm10, xmm1, xmm4
  3862  	LONG $0x7ef9e1c4; BYTE $0xd8         // vmovq    rax, xmm3
  3863  	LONG $0x2aa3e1c4; BYTE $0xd8         // vcvtsi2sd    xmm3, xmm11, rax
  3864  	LONG $0xce1469c5                     // vunpcklpd    xmm9, xmm2, xmm6
  3865  	QUAD $0x0000d0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 208]
  3866  	LONG $0x16f9e3c4; WORD $0x01e0       // vpextrq    rax, xmm4, 1
  3867  	LONG $0xdd14e1c5                     // vunpcklpd    xmm3, xmm3, xmm5
  3868  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3869  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  3870  	LONG $0x2aa3e1c4; BYTE $0xe0         // vcvtsi2sd    xmm4, xmm11, rax
  3871  	LONG $0xe514d9c5                     // vunpcklpd    xmm4, xmm4, xmm5
  3872  	QUAD $0x0000c0faac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 8*rdi + 192]
  3873  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  3874  	LONG $0x2aa3e1c4; BYTE $0xf0         // vcvtsi2sd    xmm6, xmm11, rax
  3875  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  3876  	LONG $0x2aa3e1c4; BYTE $0xe8         // vcvtsi2sd    xmm5, xmm11, rax
  3877  	QUAD $0x0000f0fabc6ffac5; BYTE $0x00 // vmovdqu    xmm7, oword [rdx + 8*rdi + 240]
  3878  	LONG $0x16f9e3c4; WORD $0x01f8       // vpextrq    rax, xmm7, 1
  3879  	LONG $0x2aa3e1c4; BYTE $0xc0         // vcvtsi2sd    xmm0, xmm11, rax
  3880  	LONG $0x7ef9e1c4; BYTE $0xf8         // vmovq    rax, xmm7
  3881  	LONG $0x2aa3e1c4; BYTE $0xf8         // vcvtsi2sd    xmm7, xmm11, rax
  3882  	QUAD $0x0000e0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 224]
  3883  	LONG $0x16f9e3c4; WORD $0x01d0       // vpextrq    rax, xmm2, 1
  3884  	LONG $0x2aa3e1c4; BYTE $0xc8         // vcvtsi2sd    xmm1, xmm11, rax
  3885  	LONG $0xee14d1c5                     // vunpcklpd    xmm5, xmm5, xmm6
  3886  	LONG $0x7ef9e1c4; BYTE $0xd0         // vmovq    rax, xmm2
  3887  	LONG $0x2aa3e1c4; BYTE $0xd0         // vcvtsi2sd    xmm2, xmm11, rax
  3888  	LONG $0xc014c1c5                     // vunpcklpd    xmm0, xmm7, xmm0
  3889  	LONG $0xc914e9c5                     // vunpcklpd    xmm1, xmm2, xmm1
  3890  	QUAD $0x000090f9941179c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 144], xmm10
  3891  	QUAD $0x000080f9841179c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 128], xmm8
  3892  	QUAD $0x0000a0f99c11f9c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 160], xmm3
  3893  	QUAD $0x0000b0f98c1179c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 176], xmm9
  3894  	QUAD $0x0000c0f9ac11f9c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 192], xmm5
  3895  	QUAD $0x0000d0f9a411f9c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 208], xmm4
  3896  	QUAD $0x0000e0f98c11f9c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 224], xmm1
  3897  	QUAD $0x0000f0f98411f9c5; BYTE $0x00 // vmovupd    oword [rcx + 8*rdi + 240], xmm0
  3898  	LONG $0x20c78348                     // add    rdi, 32
  3899  	LONG $0x02c28349                     // add    r10, 2
  3900  	JNE  LBB0_495
  3901  	JMP  LBB0_1056
  3902  
  3903  LBB0_496:
  3904  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3905  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3906  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3907  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3908  	LONG $0x04e8c149         // shr    r8, 4
  3909  	LONG $0x01c08349         // add    r8, 1
  3910  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3911  	JE   LBB0_1060
  3912  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3913  	LONG $0xfee08348         // and    rax, -2
  3914  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3915  	WORD $0xff31             // xor    edi, edi
  3916  
  3917  LBB0_498:
  3918  	LONG $0x045afcc5; BYTE $0xba         // vcvtps2pd    ymm0, oword [rdx + 4*rdi]
  3919  	LONG $0x4c5afcc5; WORD $0x10ba       // vcvtps2pd    ymm1, oword [rdx + 4*rdi + 16]
  3920  	LONG $0x545afcc5; WORD $0x20ba       // vcvtps2pd    ymm2, oword [rdx + 4*rdi + 32]
  3921  	LONG $0x5c5afcc5; WORD $0x30ba       // vcvtps2pd    ymm3, oword [rdx + 4*rdi + 48]
  3922  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  3923  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  3924  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  3925  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  3926  	LONG $0x445afcc5; WORD $0x40ba       // vcvtps2pd    ymm0, oword [rdx + 4*rdi + 64]
  3927  	LONG $0x4c5afcc5; WORD $0x50ba       // vcvtps2pd    ymm1, oword [rdx + 4*rdi + 80]
  3928  	LONG $0x545afcc5; WORD $0x60ba       // vcvtps2pd    ymm2, oword [rdx + 4*rdi + 96]
  3929  	LONG $0x5c5afcc5; WORD $0x70ba       // vcvtps2pd    ymm3, oword [rdx + 4*rdi + 112]
  3930  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  3931  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  3932  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  3933  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  3934  	LONG $0x20c78348                     // add    rdi, 32
  3935  	LONG $0x02c08348                     // add    rax, 2
  3936  	JNE  LBB0_498
  3937  	JMP  LBB0_1061
  3938  
  3939  LBB0_502:
  3940  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3941  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3942  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3943  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3944  	LONG $0x04e8c149         // shr    r8, 4
  3945  	LONG $0x01c08349         // add    r8, 1
  3946  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3947  	JE   LBB0_925
  3948  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3949  	LONG $0xfee08348         // and    rax, -2
  3950  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3951  	WORD $0xff31             // xor    edi, edi
  3952  
  3953  LBB0_504:
  3954  	LONG $0x04e6fec5; BYTE $0xba         // vcvtdq2pd    ymm0, oword [rdx + 4*rdi]
  3955  	LONG $0x4ce6fec5; WORD $0x10ba       // vcvtdq2pd    ymm1, oword [rdx + 4*rdi + 16]
  3956  	LONG $0x54e6fec5; WORD $0x20ba       // vcvtdq2pd    ymm2, oword [rdx + 4*rdi + 32]
  3957  	LONG $0x5ce6fec5; WORD $0x30ba       // vcvtdq2pd    ymm3, oword [rdx + 4*rdi + 48]
  3958  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  3959  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  3960  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  3961  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  3962  	LONG $0x44e6fec5; WORD $0x40ba       // vcvtdq2pd    ymm0, oword [rdx + 4*rdi + 64]
  3963  	LONG $0x4ce6fec5; WORD $0x50ba       // vcvtdq2pd    ymm1, oword [rdx + 4*rdi + 80]
  3964  	LONG $0x54e6fec5; WORD $0x60ba       // vcvtdq2pd    ymm2, oword [rdx + 4*rdi + 96]
  3965  	LONG $0x5ce6fec5; WORD $0x70ba       // vcvtdq2pd    ymm3, oword [rdx + 4*rdi + 112]
  3966  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  3967  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  3968  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  3969  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  3970  	LONG $0x20c78348                     // add    rdi, 32
  3971  	LONG $0x02c08348                     // add    rax, 2
  3972  	JNE  LBB0_504
  3973  	JMP  LBB0_926
  3974  
  3975  LBB0_535:
  3976  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3977  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  3978  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  3979  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3980  	LONG $0x04e8c149         // shr    r8, 4
  3981  	LONG $0x01c08349         // add    r8, 1
  3982  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3983  	JE   LBB0_930
  3984  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3985  	LONG $0xfee08348         // and    rax, -2
  3986  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3987  	WORD $0xff31             // xor    edi, edi
  3988  
  3989  LBB0_537:
  3990  	LONG $0x357de2c4; WORD $0xba04             // vpmovzxdq    ymm0, oword [rdx + 4*rdi]
  3991  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x10 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 16]
  3992  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x20 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 32]
  3993  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x30 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 48]
  3994  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  3995  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  3996  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  3997  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  3998  	LONG $0x357de2c4; WORD $0xba44; BYTE $0x40 // vpmovzxdq    ymm0, oword [rdx + 4*rdi + 64]
  3999  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x50 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 80]
  4000  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x60 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 96]
  4001  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x70 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 112]
  4002  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  4003  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  4004  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  4005  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  4006  	LONG $0x20c78348                           // add    rdi, 32
  4007  	LONG $0x02c08348                           // add    rax, 2
  4008  	JNE  LBB0_537
  4009  	JMP  LBB0_931
  4010  
  4011  LBB0_538:
  4012  	WORD $0x8945; BYTE $0xce // mov    r14d, r9d
  4013  	LONG $0xfce68341         // and    r14d, -4
  4014  	LONG $0xfc468d49         // lea    rax, [r14 - 4]
  4015  	WORD $0x8949; BYTE $0xc2 // mov    r10, rax
  4016  	LONG $0x02eac149         // shr    r10, 2
  4017  	LONG $0x01c28349         // add    r10, 1
  4018  	WORD $0x8945; BYTE $0xd0 // mov    r8d, r10d
  4019  	LONG $0x03e08341         // and    r8d, 3
  4020  	LONG $0x0cf88348         // cmp    rax, 12
  4021  	JAE  LBB0_793
  4022  	WORD $0xc031             // xor    eax, eax
  4023  	JMP  LBB0_795
  4024  
  4025  LBB0_546:
  4026  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4027  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4028  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4029  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4030  	LONG $0x04e8c149         // shr    r8, 4
  4031  	LONG $0x01c08349         // add    r8, 1
  4032  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4033  	JE   LBB0_935
  4034  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4035  	LONG $0xfee08348         // and    rax, -2
  4036  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4037  	WORD $0xff31             // xor    edi, edi
  4038  
  4039  LBB0_548:
  4040  	LONG $0x347de2c4; WORD $0x7a04             // vpmovzxwq    ymm0, qword [rdx + 2*rdi]
  4041  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 8]
  4042  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 16]
  4043  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 24]
  4044  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  4045  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  4046  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  4047  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  4048  	LONG $0x347de2c4; WORD $0x7a44; BYTE $0x20 // vpmovzxwq    ymm0, qword [rdx + 2*rdi + 32]
  4049  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x28 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 40]
  4050  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x30 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 48]
  4051  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x38 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 56]
  4052  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  4053  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  4054  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  4055  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  4056  	LONG $0x20c78348                           // add    rdi, 32
  4057  	LONG $0x02c08348                           // add    rax, 2
  4058  	JNE  LBB0_548
  4059  	JMP  LBB0_936
  4060  
  4061  LBB0_549:
  4062  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4063  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4064  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4065  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4066  	LONG $0x04e8c149         // shr    r8, 4
  4067  	LONG $0x01c08349         // add    r8, 1
  4068  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4069  	JE   LBB0_940
  4070  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4071  	LONG $0xfee08348         // and    rax, -2
  4072  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4073  	WORD $0xff31             // xor    edi, edi
  4074  
  4075  LBB0_551:
  4076  	LONG $0x247de2c4; WORD $0x7a04             // vpmovsxwq    ymm0, qword [rdx + 2*rdi]
  4077  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 8]
  4078  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 16]
  4079  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 24]
  4080  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  4081  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  4082  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  4083  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  4084  	LONG $0x247de2c4; WORD $0x7a44; BYTE $0x20 // vpmovsxwq    ymm0, qword [rdx + 2*rdi + 32]
  4085  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x28 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 40]
  4086  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x30 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 48]
  4087  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x38 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 56]
  4088  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  4089  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  4090  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  4091  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  4092  	LONG $0x20c78348                           // add    rdi, 32
  4093  	LONG $0x02c08348                           // add    rax, 2
  4094  	JNE  LBB0_551
  4095  	JMP  LBB0_941
  4096  
  4097  LBB0_555:
  4098  	WORD $0x8945; BYTE $0xce // mov    r14d, r9d
  4099  	LONG $0xfce68341         // and    r14d, -4
  4100  	LONG $0xfc468d49         // lea    rax, [r14 - 4]
  4101  	WORD $0x8949; BYTE $0xc2 // mov    r10, rax
  4102  	LONG $0x02eac149         // shr    r10, 2
  4103  	LONG $0x01c28349         // add    r10, 1
  4104  	WORD $0x8945; BYTE $0xd0 // mov    r8d, r10d
  4105  	LONG $0x03e08341         // and    r8d, 3
  4106  	LONG $0x0cf88348         // cmp    rax, 12
  4107  	JAE  LBB0_810
  4108  	WORD $0xc031             // xor    eax, eax
  4109  	JMP  LBB0_812
  4110  
  4111  LBB0_560:
  4112  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4113  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4114  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4115  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4116  	LONG $0x04e8c149         // shr    r8, 4
  4117  	LONG $0x01c08349         // add    r8, 1
  4118  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4119  	JE   LBB0_945
  4120  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4121  	LONG $0xfee08348         // and    rax, -2
  4122  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4123  	WORD $0xff31             // xor    edi, edi
  4124  
  4125  LBB0_562:
  4126  	LONG $0x257de2c4; WORD $0xba04             // vpmovsxdq    ymm0, oword [rdx + 4*rdi]
  4127  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x10 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 16]
  4128  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x20 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 32]
  4129  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x30 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 48]
  4130  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  4131  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  4132  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  4133  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  4134  	LONG $0x257de2c4; WORD $0xba44; BYTE $0x40 // vpmovsxdq    ymm0, oword [rdx + 4*rdi + 64]
  4135  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x50 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 80]
  4136  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x60 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 96]
  4137  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x70 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 112]
  4138  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  4139  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  4140  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  4141  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  4142  	LONG $0x20c78348                           // add    rdi, 32
  4143  	LONG $0x02c08348                           // add    rax, 2
  4144  	JNE  LBB0_562
  4145  	JMP  LBB0_946
  4146  
  4147  LBB0_563:
  4148  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4149  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4150  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4151  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4152  	LONG $0x05e8c149         // shr    r8, 5
  4153  	LONG $0x01c08349         // add    r8, 1
  4154  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4155  	JE   LBB0_1065
  4156  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4157  	LONG $0xfee08348         // and    rax, -2
  4158  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4159  	WORD $0xff31             // xor    edi, edi
  4160  	QUAD $0x00000080856ffdc5 // vmovdqa    ymm0, yword 128[rbp] /* [rip + .LCPI0_11] */
  4161  
  4162  LBB0_565:
  4163  	LONG $0x0c6ffec5; BYTE $0xba         // vmovdqu    ymm1, yword [rdx + 4*rdi]
  4164  	LONG $0x546ffec5; WORD $0x20ba       // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
  4165  	LONG $0x5c6ffec5; WORD $0x40ba       // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
  4166  	LONG $0x646ffec5; WORD $0x60ba       // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
  4167  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4168  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4169  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4170  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4171  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4172  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4173  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4174  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4175  	LONG $0x0c7ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm1
  4176  	LONG $0x547ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
  4177  	LONG $0x5c7ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm3
  4178  	LONG $0x647ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm4
  4179  	QUAD $0x000080ba8c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
  4180  	QUAD $0x0000a0ba946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
  4181  	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
  4182  	QUAD $0x0000e0baa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
  4183  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4184  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4185  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4186  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4187  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4188  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4189  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4190  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4191  	LONG $0x4c7ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm1
  4192  	LONG $0x547ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm2
  4193  	LONG $0x5c7ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm3
  4194  	LONG $0x647ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm4
  4195  	LONG $0x40c78348                     // add    rdi, 64
  4196  	LONG $0x02c08348                     // add    rax, 2
  4197  	JNE  LBB0_565
  4198  	JMP  LBB0_1066
  4199  
  4200  LBB0_566:
  4201  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4202  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4203  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4204  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4205  	LONG $0x05e8c149         // shr    r8, 5
  4206  	LONG $0x01c08349         // add    r8, 1
  4207  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4208  	JE   LBB0_1070
  4209  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4210  	LONG $0xfee08348         // and    rax, -2
  4211  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4212  	WORD $0xff31             // xor    edi, edi
  4213  	QUAD $0x00000080856ffdc5 // vmovdqa    ymm0, yword 128[rbp] /* [rip + .LCPI0_11] */
  4214  
  4215  LBB0_568:
  4216  	LONG $0x0c6ffec5; BYTE $0xba         // vmovdqu    ymm1, yword [rdx + 4*rdi]
  4217  	LONG $0x546ffec5; WORD $0x20ba       // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
  4218  	LONG $0x5c6ffec5; WORD $0x40ba       // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
  4219  	LONG $0x646ffec5; WORD $0x60ba       // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
  4220  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4221  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4222  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4223  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4224  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4225  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4226  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4227  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4228  	LONG $0x0c7ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm1
  4229  	LONG $0x547ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
  4230  	LONG $0x5c7ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm3
  4231  	LONG $0x647ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm4
  4232  	QUAD $0x000080ba8c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
  4233  	QUAD $0x0000a0ba946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
  4234  	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
  4235  	QUAD $0x0000e0baa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
  4236  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4237  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4238  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4239  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4240  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4241  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4242  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4243  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4244  	LONG $0x4c7ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm1
  4245  	LONG $0x547ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm2
  4246  	LONG $0x5c7ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm3
  4247  	LONG $0x647ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm4
  4248  	LONG $0x40c78348                     // add    rdi, 64
  4249  	LONG $0x02c08348                     // add    rax, 2
  4250  	JNE  LBB0_568
  4251  	JMP  LBB0_1071
  4252  
  4253  LBB0_569:
  4254  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4255  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4256  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4257  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4258  	LONG $0x04e8c149         // shr    r8, 4
  4259  	LONG $0x01c08349         // add    r8, 1
  4260  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4261  	JE   LBB0_1075
  4262  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4263  	LONG $0xfee08348         // and    rax, -2
  4264  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4265  	WORD $0xff31             // xor    edi, edi
  4266  
  4267  LBB0_571:
  4268  	LONG $0x04e6fdc5; BYTE $0xfa         // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  4269  	LONG $0x4ce6fdc5; WORD $0x20fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  4270  	LONG $0x54e6fdc5; WORD $0x40fa       // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  4271  	LONG $0x5ce6fdc5; WORD $0x60fa       // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  4272  	LONG $0x186de3c4; WORD $0x01d3       // vinsertf128    ymm2, ymm2, xmm3, 1
  4273  	LONG $0x2b6de2c4; BYTE $0xd0         // vpackusdw    ymm2, ymm2, ymm0
  4274  	LONG $0x187de3c4; WORD $0x01c1       // vinsertf128    ymm0, ymm0, xmm1, 1
  4275  	LONG $0x2b7de2c4; BYTE $0xc0         // vpackusdw    ymm0, ymm0, ymm0
  4276  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  4277  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  4278  	LONG $0x047ffec5; BYTE $0x79         // vmovdqu    yword [rcx + 2*rdi], ymm0
  4279  	QUAD $0x000080fa84e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm0, yword [rdx + 8*rdi + 128]
  4280  	QUAD $0x0000a0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 160]
  4281  	QUAD $0x0000c0fa94e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 192]
  4282  	QUAD $0x0000e0fa9ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 224]
  4283  	LONG $0x186de3c4; WORD $0x01d3       // vinsertf128    ymm2, ymm2, xmm3, 1
  4284  	LONG $0x2b6de2c4; BYTE $0xd0         // vpackusdw    ymm2, ymm2, ymm0
  4285  	LONG $0x187de3c4; WORD $0x01c1       // vinsertf128    ymm0, ymm0, xmm1, 1
  4286  	LONG $0x2b7de2c4; BYTE $0xc0         // vpackusdw    ymm0, ymm0, ymm0
  4287  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  4288  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  4289  	LONG $0x447ffec5; WORD $0x2079       // vmovdqu    yword [rcx + 2*rdi + 32], ymm0
  4290  	LONG $0x20c78348                     // add    rdi, 32
  4291  	LONG $0x02c08348                     // add    rax, 2
  4292  	JNE  LBB0_571
  4293  	JMP  LBB0_1076
  4294  
  4295  LBB0_572:
  4296  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4297  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4298  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4299  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4300  	LONG $0x04e8c149         // shr    r8, 4
  4301  	LONG $0x01c08349         // add    r8, 1
  4302  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4303  	JE   LBB0_1080
  4304  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4305  	LONG $0xfee08348         // and    rax, -2
  4306  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4307  	WORD $0xff31             // xor    edi, edi
  4308  
  4309  LBB0_574:
  4310  	LONG $0x04e6fdc5; BYTE $0xfa         // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  4311  	LONG $0x4ce6fdc5; WORD $0x20fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  4312  	LONG $0x54e6fdc5; WORD $0x40fa       // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  4313  	LONG $0x5ce6fdc5; WORD $0x60fa       // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  4314  	LONG $0x186de3c4; WORD $0x01d3       // vinsertf128    ymm2, ymm2, xmm3, 1
  4315  	LONG $0xd06bedc5                     // vpackssdw    ymm2, ymm2, ymm0
  4316  	LONG $0x187de3c4; WORD $0x01c1       // vinsertf128    ymm0, ymm0, xmm1, 1
  4317  	LONG $0xc06bfdc5                     // vpackssdw    ymm0, ymm0, ymm0
  4318  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  4319  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  4320  	LONG $0x047ffec5; BYTE $0x79         // vmovdqu    yword [rcx + 2*rdi], ymm0
  4321  	QUAD $0x000080fa84e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm0, yword [rdx + 8*rdi + 128]
  4322  	QUAD $0x0000a0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 160]
  4323  	QUAD $0x0000c0fa94e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 192]
  4324  	QUAD $0x0000e0fa9ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 224]
  4325  	LONG $0x186de3c4; WORD $0x01d3       // vinsertf128    ymm2, ymm2, xmm3, 1
  4326  	LONG $0xd06bedc5                     // vpackssdw    ymm2, ymm2, ymm0
  4327  	LONG $0x187de3c4; WORD $0x01c1       // vinsertf128    ymm0, ymm0, xmm1, 1
  4328  	LONG $0xc06bfdc5                     // vpackssdw    ymm0, ymm0, ymm0
  4329  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  4330  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  4331  	LONG $0x447ffec5; WORD $0x2079       // vmovdqu    yword [rcx + 2*rdi + 32], ymm0
  4332  	LONG $0x20c78348                     // add    rdi, 32
  4333  	LONG $0x02c08348                     // add    rax, 2
  4334  	JNE  LBB0_574
  4335  	JMP  LBB0_1081
  4336  
  4337  LBB0_581:
  4338  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4339  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4340  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4341  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4342  	LONG $0x04e8c149         // shr    r8, 4
  4343  	LONG $0x01c08349         // add    r8, 1
  4344  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4345  	JE   LBB0_950
  4346  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4347  	LONG $0xfee08348         // and    rax, -2
  4348  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4349  	WORD $0xff31             // xor    edi, edi
  4350  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
  4351  
  4352  LBB0_583:
  4353  	LONG $0x0e7963c4; WORD $0xfa04; BYTE $0x11         // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi], 17
  4354  	QUAD $0x1110fa540e79e3c4                           // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  4355  	QUAD $0x1120fa5c0e79e3c4                           // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  4356  	QUAD $0x1130fa640e79e3c4                           // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  4357  	QUAD $0x1140fa6c0e79e3c4                           // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  4358  	QUAD $0x1150fa740e79e3c4                           // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  4359  	QUAD $0x1160fa7c0e79e3c4                           // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  4360  	QUAD $0x1170fa4c0e79e3c4                           // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 112], 17
  4361  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4362  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4363  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4364  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4365  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4366  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4367  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4368  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4369  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4370  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4371  	LONG $0x0c7ffec5; BYTE $0x79                       // vmovdqu    yword [rcx + 2*rdi], ymm1
  4372  	QUAD $0x0080fa840e7963c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi + 128], 17
  4373  	QUAD $0x0090fa940e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 144], 17
  4374  	QUAD $0x00a0fa9c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 160], 17
  4375  	QUAD $0x00b0faa40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 176], 17
  4376  	QUAD $0x00c0faac0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 192], 17
  4377  	QUAD $0x00d0fab40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 208], 17
  4378  	QUAD $0x00e0fabc0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 224], 17
  4379  	QUAD $0x00f0fa8c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 240], 17
  4380  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4381  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4382  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4383  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4384  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4385  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4386  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4387  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4388  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4389  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4390  	LONG $0x4c7ffec5; WORD $0x2079                     // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  4391  	LONG $0x20c78348                                   // add    rdi, 32
  4392  	LONG $0x02c08348                                   // add    rax, 2
  4393  	JNE  LBB0_583
  4394  	JMP  LBB0_951
  4395  
  4396  LBB0_584:
  4397  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4398  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4399  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4400  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4401  	LONG $0x04e8c149         // shr    r8, 4
  4402  	LONG $0x01c08349         // add    r8, 1
  4403  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4404  	JE   LBB0_955
  4405  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4406  	LONG $0xfee08348         // and    rax, -2
  4407  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4408  	WORD $0xff31             // xor    edi, edi
  4409  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
  4410  
  4411  LBB0_586:
  4412  	LONG $0x0e7963c4; WORD $0xfa04; BYTE $0x11         // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi], 17
  4413  	QUAD $0x1110fa540e79e3c4                           // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  4414  	QUAD $0x1120fa5c0e79e3c4                           // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  4415  	QUAD $0x1130fa640e79e3c4                           // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  4416  	QUAD $0x1140fa6c0e79e3c4                           // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  4417  	QUAD $0x1150fa740e79e3c4                           // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  4418  	QUAD $0x1160fa7c0e79e3c4                           // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  4419  	QUAD $0x1170fa4c0e79e3c4                           // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 112], 17
  4420  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4421  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4422  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4423  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4424  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4425  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4426  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4427  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4428  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4429  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4430  	LONG $0x0c7ffec5; BYTE $0x79                       // vmovdqu    yword [rcx + 2*rdi], ymm1
  4431  	QUAD $0x0080fa840e7963c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi + 128], 17
  4432  	QUAD $0x0090fa940e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 144], 17
  4433  	QUAD $0x00a0fa9c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 160], 17
  4434  	QUAD $0x00b0faa40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 176], 17
  4435  	QUAD $0x00c0faac0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 192], 17
  4436  	QUAD $0x00d0fab40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 208], 17
  4437  	QUAD $0x00e0fabc0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 224], 17
  4438  	QUAD $0x00f0fa8c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 240], 17
  4439  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4440  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4441  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4442  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4443  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4444  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4445  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4446  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4447  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4448  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4449  	LONG $0x4c7ffec5; WORD $0x2079                     // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  4450  	LONG $0x20c78348                                   // add    rdi, 32
  4451  	LONG $0x02c08348                                   // add    rax, 2
  4452  	JNE  LBB0_586
  4453  	JMP  LBB0_956
  4454  
  4455  LBB0_599:
  4456  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4457  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4458  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4459  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4460  	LONG $0x04e8c149         // shr    r8, 4
  4461  	LONG $0x01c08349         // add    r8, 1
  4462  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4463  	JE   LBB0_960
  4464  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4465  	LONG $0xfee08348         // and    rax, -2
  4466  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4467  	WORD $0xff31             // xor    edi, edi
  4468  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
  4469  
  4470  LBB0_601:
  4471  	LONG $0x0e7963c4; WORD $0xfa04; BYTE $0x11         // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi], 17
  4472  	QUAD $0x1110fa540e79e3c4                           // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  4473  	QUAD $0x1120fa5c0e79e3c4                           // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  4474  	QUAD $0x1130fa640e79e3c4                           // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  4475  	QUAD $0x1140fa6c0e79e3c4                           // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  4476  	QUAD $0x1150fa740e79e3c4                           // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  4477  	QUAD $0x1160fa7c0e79e3c4                           // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  4478  	QUAD $0x1170fa4c0e79e3c4                           // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 112], 17
  4479  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4480  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4481  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4482  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4483  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4484  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4485  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4486  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4487  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4488  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4489  	LONG $0x0c7ffec5; BYTE $0x79                       // vmovdqu    yword [rcx + 2*rdi], ymm1
  4490  	QUAD $0x0080fa840e7963c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi + 128], 17
  4491  	QUAD $0x0090fa940e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 144], 17
  4492  	QUAD $0x00a0fa9c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 160], 17
  4493  	QUAD $0x00b0faa40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 176], 17
  4494  	QUAD $0x00c0faac0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 192], 17
  4495  	QUAD $0x00d0fab40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 208], 17
  4496  	QUAD $0x00e0fabc0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 224], 17
  4497  	QUAD $0x00f0fa8c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 240], 17
  4498  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4499  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4500  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4501  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4502  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4503  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4504  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4505  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4506  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4507  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4508  	LONG $0x4c7ffec5; WORD $0x2079                     // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  4509  	LONG $0x20c78348                                   // add    rdi, 32
  4510  	LONG $0x02c08348                                   // add    rax, 2
  4511  	JNE  LBB0_601
  4512  	JMP  LBB0_961
  4513  
  4514  LBB0_602:
  4515  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4516  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4517  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4518  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4519  	LONG $0x04e8c149         // shr    r8, 4
  4520  	LONG $0x01c08349         // add    r8, 1
  4521  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4522  	JE   LBB0_1085
  4523  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4524  	LONG $0xfee08348         // and    rax, -2
  4525  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4526  	WORD $0xff31             // xor    edi, edi
  4527  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
  4528  
  4529  LBB0_604:
  4530  	LONG $0x0e7963c4; WORD $0xfa04; BYTE $0x11         // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi], 17
  4531  	QUAD $0x1110fa540e79e3c4                           // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  4532  	QUAD $0x1120fa5c0e79e3c4                           // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  4533  	QUAD $0x1130fa640e79e3c4                           // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  4534  	QUAD $0x1140fa6c0e79e3c4                           // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  4535  	QUAD $0x1150fa740e79e3c4                           // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  4536  	QUAD $0x1160fa7c0e79e3c4                           // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  4537  	QUAD $0x1170fa4c0e79e3c4                           // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 112], 17
  4538  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4539  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4540  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4541  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4542  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4543  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4544  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4545  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4546  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4547  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4548  	LONG $0x0c7ffec5; BYTE $0x79                       // vmovdqu    yword [rcx + 2*rdi], ymm1
  4549  	QUAD $0x0080fa840e7963c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm8, xmm0, oword [rdx + 8*rdi + 128], 17
  4550  	QUAD $0x0090fa940e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 144], 17
  4551  	QUAD $0x00a0fa9c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 160], 17
  4552  	QUAD $0x00b0faa40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 176], 17
  4553  	QUAD $0x00c0faac0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 192], 17
  4554  	QUAD $0x00d0fab40e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 208], 17
  4555  	QUAD $0x00e0fabc0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 224], 17
  4556  	QUAD $0x00f0fa8c0e79e3c4; WORD $0x0000; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi + 240], 17
  4557  	LONG $0x384de3c4; WORD $0x01c9                     // vinserti128    ymm1, ymm6, xmm1, 1
  4558  	LONG $0x3855e3c4; WORD $0x01ef                     // vinserti128    ymm5, ymm5, xmm7, 1
  4559  	LONG $0x2b55e2c4; BYTE $0xc9                       // vpackusdw    ymm1, ymm5, ymm1
  4560  	LONG $0x2b75e2c4; BYTE $0xc8                       // vpackusdw    ymm1, ymm1, ymm0
  4561  	LONG $0x386de3c4; WORD $0x01d4                     // vinserti128    ymm2, ymm2, xmm4, 1
  4562  	LONG $0x383de3c4; WORD $0x01db                     // vinserti128    ymm3, ymm8, xmm3, 1
  4563  	LONG $0x2b65e2c4; BYTE $0xd2                       // vpackusdw    ymm2, ymm3, ymm2
  4564  	LONG $0x2b6de2c4; BYTE $0xd0                       // vpackusdw    ymm2, ymm2, ymm0
  4565  	LONG $0xc96cedc5                                   // vpunpcklqdq    ymm1, ymm2, ymm1
  4566  	LONG $0x00fde3c4; WORD $0xd8c9                     // vpermq    ymm1, ymm1, 216
  4567  	LONG $0x4c7ffec5; WORD $0x2079                     // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  4568  	LONG $0x20c78348                                   // add    rdi, 32
  4569  	LONG $0x02c08348                                   // add    rax, 2
  4570  	JNE  LBB0_604
  4571  	JMP  LBB0_1086
  4572  
  4573  LBB0_605:
  4574  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4575  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4576  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4577  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4578  	LONG $0x05e8c149         // shr    r8, 5
  4579  	LONG $0x01c08349         // add    r8, 1
  4580  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4581  	JE   LBB0_1090
  4582  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4583  	LONG $0xfee08348         // and    rax, -2
  4584  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4585  	WORD $0xff31             // xor    edi, edi
  4586  
  4587  LBB0_607:
  4588  	LONG $0x045bfec5; BYTE $0xba         // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  4589  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  4590  	LONG $0x2b79e2c4; BYTE $0xc1         // vpackusdw    xmm0, xmm0, xmm1
  4591  	LONG $0x4c5bfec5; WORD $0x20ba       // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  4592  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  4593  	LONG $0x2b71e2c4; BYTE $0xca         // vpackusdw    xmm1, xmm1, xmm2
  4594  	LONG $0x545bfec5; WORD $0x40ba       // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  4595  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  4596  	LONG $0x2b69e2c4; BYTE $0xd3         // vpackusdw    xmm2, xmm2, xmm3
  4597  	LONG $0x5c5bfec5; WORD $0x60ba       // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  4598  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  4599  	LONG $0x2b61e2c4; BYTE $0xdc         // vpackusdw    xmm3, xmm3, xmm4
  4600  	LONG $0x047ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm0
  4601  	LONG $0x4c7ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  4602  	LONG $0x547ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  4603  	LONG $0x5c7ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  4604  	QUAD $0x000080ba845bfec5; BYTE $0x00 // vcvttps2dq    ymm0, yword [rdx + 4*rdi + 128]
  4605  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  4606  	LONG $0x2b79e2c4; BYTE $0xc1         // vpackusdw    xmm0, xmm0, xmm1
  4607  	QUAD $0x0000a0ba8c5bfec5; BYTE $0x00 // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 160]
  4608  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  4609  	LONG $0x2b71e2c4; BYTE $0xca         // vpackusdw    xmm1, xmm1, xmm2
  4610  	QUAD $0x0000c0ba945bfec5; BYTE $0x00 // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 192]
  4611  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  4612  	LONG $0x2b69e2c4; BYTE $0xd3         // vpackusdw    xmm2, xmm2, xmm3
  4613  	QUAD $0x0000e0ba9c5bfec5; BYTE $0x00 // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 224]
  4614  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  4615  	LONG $0x2b61e2c4; BYTE $0xdc         // vpackusdw    xmm3, xmm3, xmm4
  4616  	LONG $0x447ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm0
  4617  	LONG $0x4c7ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm1
  4618  	LONG $0x547ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm2
  4619  	LONG $0x5c7ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm3
  4620  	LONG $0x40c78348                     // add    rdi, 64
  4621  	LONG $0x02c08348                     // add    rax, 2
  4622  	JNE  LBB0_607
  4623  	JMP  LBB0_1091
  4624  
  4625  LBB0_608:
  4626  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4627  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4628  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4629  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4630  	LONG $0x05e8c149         // shr    r8, 5
  4631  	LONG $0x01c08349         // add    r8, 1
  4632  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4633  	JE   LBB0_1095
  4634  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4635  	LONG $0xfee08348         // and    rax, -2
  4636  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4637  	WORD $0xff31             // xor    edi, edi
  4638  
  4639  LBB0_610:
  4640  	LONG $0x045bfec5; BYTE $0xba         // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  4641  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  4642  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  4643  	LONG $0x4c5bfec5; WORD $0x20ba       // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  4644  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  4645  	LONG $0xca6bf1c5                     // vpackssdw    xmm1, xmm1, xmm2
  4646  	LONG $0x545bfec5; WORD $0x40ba       // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  4647  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  4648  	LONG $0xd36be9c5                     // vpackssdw    xmm2, xmm2, xmm3
  4649  	LONG $0x5c5bfec5; WORD $0x60ba       // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  4650  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  4651  	LONG $0xdc6be1c5                     // vpackssdw    xmm3, xmm3, xmm4
  4652  	LONG $0x047ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm0
  4653  	LONG $0x4c7ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  4654  	LONG $0x547ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  4655  	LONG $0x5c7ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  4656  	QUAD $0x000080ba845bfec5; BYTE $0x00 // vcvttps2dq    ymm0, yword [rdx + 4*rdi + 128]
  4657  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  4658  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  4659  	QUAD $0x0000a0ba8c5bfec5; BYTE $0x00 // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 160]
  4660  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  4661  	LONG $0xca6bf1c5                     // vpackssdw    xmm1, xmm1, xmm2
  4662  	QUAD $0x0000c0ba945bfec5; BYTE $0x00 // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 192]
  4663  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  4664  	LONG $0xd36be9c5                     // vpackssdw    xmm2, xmm2, xmm3
  4665  	QUAD $0x0000e0ba9c5bfec5; BYTE $0x00 // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 224]
  4666  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  4667  	LONG $0xdc6be1c5                     // vpackssdw    xmm3, xmm3, xmm4
  4668  	LONG $0x447ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm0
  4669  	LONG $0x4c7ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm1
  4670  	LONG $0x547ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm2
  4671  	LONG $0x5c7ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm3
  4672  	LONG $0x40c78348                     // add    rdi, 64
  4673  	LONG $0x02c08348                     // add    rax, 2
  4674  	JNE  LBB0_610
  4675  	JMP  LBB0_1096
  4676  
  4677  LBB0_617:
  4678  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4679  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4680  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4681  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4682  	LONG $0x05e8c149         // shr    r8, 5
  4683  	LONG $0x01c08349         // add    r8, 1
  4684  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4685  	JE   LBB0_965
  4686  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4687  	LONG $0xfee08348         // and    rax, -2
  4688  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4689  	WORD $0xff31             // xor    edi, edi
  4690  	QUAD $0x00000080856ffdc5 // vmovdqa    ymm0, yword 128[rbp] /* [rip + .LCPI0_11] */
  4691  
  4692  LBB0_619:
  4693  	LONG $0x0c6ffec5; BYTE $0xba         // vmovdqu    ymm1, yword [rdx + 4*rdi]
  4694  	LONG $0x546ffec5; WORD $0x20ba       // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
  4695  	LONG $0x5c6ffec5; WORD $0x40ba       // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
  4696  	LONG $0x646ffec5; WORD $0x60ba       // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
  4697  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4698  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4699  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4700  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4701  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4702  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4703  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4704  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4705  	LONG $0x0c7ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm1
  4706  	LONG $0x547ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
  4707  	LONG $0x5c7ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm3
  4708  	LONG $0x647ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm4
  4709  	QUAD $0x000080ba8c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
  4710  	QUAD $0x0000a0ba946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
  4711  	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
  4712  	QUAD $0x0000e0baa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
  4713  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4714  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4715  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4716  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4717  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4718  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4719  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4720  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4721  	LONG $0x4c7ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm1
  4722  	LONG $0x547ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm2
  4723  	LONG $0x5c7ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm3
  4724  	LONG $0x647ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm4
  4725  	LONG $0x40c78348                     // add    rdi, 64
  4726  	LONG $0x02c08348                     // add    rax, 2
  4727  	JNE  LBB0_619
  4728  	JMP  LBB0_966
  4729  
  4730  LBB0_620:
  4731  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4732  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  4733  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  4734  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4735  	LONG $0x05e8c149         // shr    r8, 5
  4736  	LONG $0x01c08349         // add    r8, 1
  4737  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4738  	JE   LBB0_970
  4739  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4740  	LONG $0xfee08348         // and    rax, -2
  4741  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4742  	WORD $0xff31             // xor    edi, edi
  4743  	QUAD $0x00000080856ffdc5 // vmovdqa    ymm0, yword 128[rbp] /* [rip + .LCPI0_11] */
  4744  
  4745  LBB0_622:
  4746  	LONG $0x0c6ffec5; BYTE $0xba         // vmovdqu    ymm1, yword [rdx + 4*rdi]
  4747  	LONG $0x546ffec5; WORD $0x20ba       // vmovdqu    ymm2, yword [rdx + 4*rdi + 32]
  4748  	LONG $0x5c6ffec5; WORD $0x40ba       // vmovdqu    ymm3, yword [rdx + 4*rdi + 64]
  4749  	LONG $0x646ffec5; WORD $0x60ba       // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
  4750  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4751  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4752  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4753  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4754  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4755  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4756  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4757  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4758  	LONG $0x0c7ffac5; BYTE $0x79         // vmovdqu    oword [rcx + 2*rdi], xmm1
  4759  	LONG $0x547ffac5; WORD $0x1079       // vmovdqu    oword [rcx + 2*rdi + 16], xmm2
  4760  	LONG $0x5c7ffac5; WORD $0x2079       // vmovdqu    oword [rcx + 2*rdi + 32], xmm3
  4761  	LONG $0x647ffac5; WORD $0x3079       // vmovdqu    oword [rcx + 2*rdi + 48], xmm4
  4762  	QUAD $0x000080ba8c6ffec5; BYTE $0x00 // vmovdqu    ymm1, yword [rdx + 4*rdi + 128]
  4763  	QUAD $0x0000a0ba946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdx + 4*rdi + 160]
  4764  	QUAD $0x0000c0ba9c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdx + 4*rdi + 192]
  4765  	QUAD $0x0000e0baa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 4*rdi + 224]
  4766  	LONG $0x0075e2c4; BYTE $0xc8         // vpshufb    ymm1, ymm1, ymm0
  4767  	LONG $0x00fde3c4; WORD $0xe8c9       // vpermq    ymm1, ymm1, 232
  4768  	LONG $0x006de2c4; BYTE $0xd0         // vpshufb    ymm2, ymm2, ymm0
  4769  	LONG $0x00fde3c4; WORD $0xe8d2       // vpermq    ymm2, ymm2, 232
  4770  	LONG $0x0065e2c4; BYTE $0xd8         // vpshufb    ymm3, ymm3, ymm0
  4771  	LONG $0x00fde3c4; WORD $0xe8db       // vpermq    ymm3, ymm3, 232
  4772  	LONG $0x005de2c4; BYTE $0xe0         // vpshufb    ymm4, ymm4, ymm0
  4773  	LONG $0x00fde3c4; WORD $0xe8e4       // vpermq    ymm4, ymm4, 232
  4774  	LONG $0x4c7ffac5; WORD $0x4079       // vmovdqu    oword [rcx + 2*rdi + 64], xmm1
  4775  	LONG $0x547ffac5; WORD $0x5079       // vmovdqu    oword [rcx + 2*rdi + 80], xmm2
  4776  	LONG $0x5c7ffac5; WORD $0x6079       // vmovdqu    oword [rcx + 2*rdi + 96], xmm3
  4777  	LONG $0x647ffac5; WORD $0x7079       // vmovdqu    oword [rcx + 2*rdi + 112], xmm4
  4778  	LONG $0x40c78348                     // add    rdi, 64
  4779  	LONG $0x02c08348                     // add    rax, 2
  4780  	JNE  LBB0_622
  4781  	JMP  LBB0_971
  4782  
  4783  LBB0_623:
  4784  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4785  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4786  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4787  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4788  	LONG $0x04e8c149         // shr    r8, 4
  4789  	LONG $0x01c08349         // add    r8, 1
  4790  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4791  	JE   LBB0_1100
  4792  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4793  	LONG $0xfee08348         // and    rax, -2
  4794  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4795  	WORD $0xff31             // xor    edi, edi
  4796  
  4797  LBB0_625:
  4798  	LONG $0x357de2c4; WORD $0xba04             // vpmovzxdq    ymm0, oword [rdx + 4*rdi]
  4799  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x10 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 16]
  4800  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x20 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 32]
  4801  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x30 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 48]
  4802  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  4803  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  4804  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  4805  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  4806  	LONG $0x357de2c4; WORD $0xba44; BYTE $0x40 // vpmovzxdq    ymm0, oword [rdx + 4*rdi + 64]
  4807  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x50 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 80]
  4808  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x60 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 96]
  4809  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x70 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 112]
  4810  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  4811  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  4812  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  4813  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  4814  	LONG $0x20c78348                           // add    rdi, 32
  4815  	LONG $0x02c08348                           // add    rax, 2
  4816  	JNE  LBB0_625
  4817  	JMP  LBB0_1101
  4818  
  4819  LBB0_626:
  4820  	WORD $0x8944; BYTE $0xce       // mov    esi, r9d
  4821  	WORD $0xe683; BYTE $0xe0       // and    esi, -32
  4822  	LONG $0xe0468d48               // lea    rax, [rsi - 32]
  4823  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
  4824  	LONG $0x05e8c149               // shr    r8, 5
  4825  	LONG $0x01c08349               // add    r8, 1
  4826  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
  4827  	JE   LBB0_1105
  4828  	WORD $0x894c; BYTE $0xc0       // mov    rax, r8
  4829  	LONG $0xfee08348               // and    rax, -2
  4830  	WORD $0xf748; BYTE $0xd8       // neg    rax
  4831  	LONG $0x587de2c4; WORD $0x3445 // vpbroadcastd    ymm0, dword 52[rbp] /* [rip + .LCPI0_13] */
  4832  	WORD $0xff31                   // xor    edi, edi
  4833  	LONG $0x587de2c4; WORD $0x384d // vpbroadcastd    ymm1, dword 56[rbp] /* [rip + .LCPI0_14] */
  4834  	LONG $0x187de2c4; WORD $0x3c55 // vbroadcastss    ymm2, dword 60[rbp] /* [rip + .LCPI0_15] */
  4835  
  4836  LBB0_628:
  4837  	LONG $0x1c6ffec5; BYTE $0xba         // vmovdqu    ymm3, yword [rdx + 4*rdi]
  4838  	LONG $0x646ffec5; WORD $0x20ba       // vmovdqu    ymm4, yword [rdx + 4*rdi + 32]
  4839  	LONG $0x6c6ffec5; WORD $0x40ba       // vmovdqu    ymm5, yword [rdx + 4*rdi + 64]
  4840  	LONG $0x746ffec5; WORD $0x60ba       // vmovdqu    ymm6, yword [rdx + 4*rdi + 96]
  4841  	LONG $0x0e65e3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm3, ymm0, 170
  4842  	LONG $0xd372e5c5; BYTE $0x10         // vpsrld    ymm3, ymm3, 16
  4843  	LONG $0x0e65e3c4; WORD $0xaad9       // vpblendw    ymm3, ymm3, ymm1, 170
  4844  	LONG $0xda5ce4c5                     // vsubps    ymm3, ymm3, ymm2
  4845  	LONG $0xdb58c4c5                     // vaddps    ymm3, ymm7, ymm3
  4846  	LONG $0x0e5de3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm4, ymm0, 170
  4847  	LONG $0xd472ddc5; BYTE $0x10         // vpsrld    ymm4, ymm4, 16
  4848  	LONG $0x0e5de3c4; WORD $0xaae1       // vpblendw    ymm4, ymm4, ymm1, 170
  4849  	LONG $0xe25cdcc5                     // vsubps    ymm4, ymm4, ymm2
  4850  	LONG $0xe458c4c5                     // vaddps    ymm4, ymm7, ymm4
  4851  	LONG $0x0e55e3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm5, ymm0, 170
  4852  	LONG $0xd572d5c5; BYTE $0x10         // vpsrld    ymm5, ymm5, 16
  4853  	LONG $0x0e55e3c4; WORD $0xaae9       // vpblendw    ymm5, ymm5, ymm1, 170
  4854  	LONG $0xea5cd4c5                     // vsubps    ymm5, ymm5, ymm2
  4855  	LONG $0xed58c4c5                     // vaddps    ymm5, ymm7, ymm5
  4856  	LONG $0x0e4de3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm6, ymm0, 170
  4857  	LONG $0xd672cdc5; BYTE $0x10         // vpsrld    ymm6, ymm6, 16
  4858  	LONG $0x0e4de3c4; WORD $0xaaf1       // vpblendw    ymm6, ymm6, ymm1, 170
  4859  	LONG $0xf25cccc5                     // vsubps    ymm6, ymm6, ymm2
  4860  	LONG $0xf658c4c5                     // vaddps    ymm6, ymm7, ymm6
  4861  	LONG $0x1c11fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm3
  4862  	LONG $0x6411fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm4
  4863  	LONG $0x6c11fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm5
  4864  	LONG $0x7411fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm6
  4865  	QUAD $0x000080ba9c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdx + 4*rdi + 128]
  4866  	QUAD $0x0000a0baa46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdx + 4*rdi + 160]
  4867  	QUAD $0x0000c0baac6ffec5; BYTE $0x00 // vmovdqu    ymm5, yword [rdx + 4*rdi + 192]
  4868  	QUAD $0x0000e0bab46ffec5; BYTE $0x00 // vmovdqu    ymm6, yword [rdx + 4*rdi + 224]
  4869  	LONG $0x0e65e3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm3, ymm0, 170
  4870  	LONG $0xd372e5c5; BYTE $0x10         // vpsrld    ymm3, ymm3, 16
  4871  	LONG $0x0e65e3c4; WORD $0xaad9       // vpblendw    ymm3, ymm3, ymm1, 170
  4872  	LONG $0xda5ce4c5                     // vsubps    ymm3, ymm3, ymm2
  4873  	LONG $0xdb58c4c5                     // vaddps    ymm3, ymm7, ymm3
  4874  	LONG $0x0e5de3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm4, ymm0, 170
  4875  	LONG $0xd472ddc5; BYTE $0x10         // vpsrld    ymm4, ymm4, 16
  4876  	LONG $0x0e5de3c4; WORD $0xaae1       // vpblendw    ymm4, ymm4, ymm1, 170
  4877  	LONG $0xe25cdcc5                     // vsubps    ymm4, ymm4, ymm2
  4878  	LONG $0xe458c4c5                     // vaddps    ymm4, ymm7, ymm4
  4879  	LONG $0x0e55e3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm5, ymm0, 170
  4880  	LONG $0xd572d5c5; BYTE $0x10         // vpsrld    ymm5, ymm5, 16
  4881  	LONG $0x0e55e3c4; WORD $0xaae9       // vpblendw    ymm5, ymm5, ymm1, 170
  4882  	LONG $0xea5cd4c5                     // vsubps    ymm5, ymm5, ymm2
  4883  	LONG $0xed58c4c5                     // vaddps    ymm5, ymm7, ymm5
  4884  	LONG $0x0e4de3c4; WORD $0xaaf8       // vpblendw    ymm7, ymm6, ymm0, 170
  4885  	LONG $0xd672cdc5; BYTE $0x10         // vpsrld    ymm6, ymm6, 16
  4886  	LONG $0x0e4de3c4; WORD $0xaaf1       // vpblendw    ymm6, ymm6, ymm1, 170
  4887  	LONG $0xf25cccc5                     // vsubps    ymm6, ymm6, ymm2
  4888  	LONG $0xf658c4c5                     // vaddps    ymm6, ymm7, ymm6
  4889  	QUAD $0x000080b99c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 128], ymm3
  4890  	QUAD $0x0000a0b9a411fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 160], ymm4
  4891  	QUAD $0x0000c0b9ac11fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 192], ymm5
  4892  	QUAD $0x0000e0b9b411fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 224], ymm6
  4893  	LONG $0x40c78348                     // add    rdi, 64
  4894  	LONG $0x02c08348                     // add    rax, 2
  4895  	JNE  LBB0_628
  4896  	JMP  LBB0_1106
  4897  
  4898  LBB0_629:
  4899  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4900  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4901  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4902  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4903  	LONG $0x04e8c149         // shr    r8, 4
  4904  	LONG $0x01c08349         // add    r8, 1
  4905  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4906  	JE   LBB0_1110
  4907  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
  4908  	LONG $0xfee28349         // and    r10, -2
  4909  	WORD $0xf749; BYTE $0xda // neg    r10
  4910  	WORD $0xff31             // xor    edi, edi
  4911  
  4912  LBB0_631:
  4913  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x08 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 8]
  4914  	LONG $0x6ef9e1c4; BYTE $0xc3               // vmovq    xmm0, rbx
  4915  	LONG $0x2cfbe1c4; WORD $0xfa1c             // vcvttsd2si    rbx, qword [rdx + 8*rdi]
  4916  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  4917  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x18 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 24]
  4918  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
  4919  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x10 // vcvttsd2si    rax, qword [rdx + 8*rdi + 16]
  4920  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  4921  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x38 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 56]
  4922  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  4923  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x30 // vcvttsd2si    rax, qword [rdx + 8*rdi + 48]
  4924  	LONG $0xc96ce9c5                           // vpunpcklqdq    xmm1, xmm2, xmm1
  4925  	LONG $0x6ef9e1c4; BYTE $0xd3               // vmovq    xmm2, rbx
  4926  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  4927  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
  4928  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x28 // vcvttsd2si    rax, qword [rdx + 8*rdi + 40]
  4929  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  4930  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x20 // vcvttsd2si    rax, qword [rdx + 8*rdi + 32]
  4931  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  4932  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
  4933  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x58 // vcvttsd2si    rax, qword [rdx + 8*rdi + 88]
  4934  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  4935  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x50 // vcvttsd2si    rax, qword [rdx + 8*rdi + 80]
  4936  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  4937  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x48 // vcvttsd2si    rax, qword [rdx + 8*rdi + 72]
  4938  	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
  4939  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x40 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 64]
  4940  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  4941  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x78 // vcvttsd2si    rax, qword [rdx + 8*rdi + 120]
  4942  	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
  4943  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x70 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 112]
  4944  	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
  4945  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  4946  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
  4947  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
  4948  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x68 // vcvttsd2si    rax, qword [rdx + 8*rdi + 104]
  4949  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  4950  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x60 // vcvttsd2si    rax, qword [rdx + 8*rdi + 96]
  4951  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  4952  	LONG $0xc76cf9c5                           // vpunpcklqdq    xmm0, xmm0, xmm7
  4953  	LONG $0x4c7ffac5; WORD $0x10f9             // vmovdqu    oword [rcx + 8*rdi + 16], xmm1
  4954  	LONG $0x047f7ac5; BYTE $0xf9               // vmovdqu    oword [rcx + 8*rdi], xmm8
  4955  	LONG $0x5c7ffac5; WORD $0x20f9             // vmovdqu    oword [rcx + 8*rdi + 32], xmm3
  4956  	LONG $0x547ffac5; WORD $0x30f9             // vmovdqu    oword [rcx + 8*rdi + 48], xmm2
  4957  	LONG $0x6c7ffac5; WORD $0x40f9             // vmovdqu    oword [rcx + 8*rdi + 64], xmm5
  4958  	LONG $0x647ffac5; WORD $0x50f9             // vmovdqu    oword [rcx + 8*rdi + 80], xmm4
  4959  	LONG $0x447ffac5; WORD $0x60f9             // vmovdqu    oword [rcx + 8*rdi + 96], xmm0
  4960  	LONG $0x747ffac5; WORD $0x70f9             // vmovdqu    oword [rcx + 8*rdi + 112], xmm6
  4961  	QUAD $0x0088fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 136]
  4962  	QUAD $0x0080fa9c2cfbe1c4; WORD $0x0000     // vcvttsd2si    rbx, qword [rdx + 8*rdi + 128]
  4963  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  4964  	QUAD $0x0098fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 152]
  4965  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  4966  	QUAD $0x0090fa9c2cfbe1c4; WORD $0x0000     // vcvttsd2si    rbx, qword [rdx + 8*rdi + 144]
  4967  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  4968  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
  4969  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  4970  	LONG $0xca6cf1c5                           // vpunpcklqdq    xmm1, xmm1, xmm2
  4971  	QUAD $0x00b8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 184]
  4972  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  4973  	QUAD $0x00b0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 176]
  4974  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  4975  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
  4976  	QUAD $0x00a8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 168]
  4977  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  4978  	QUAD $0x00a0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 160]
  4979  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  4980  	QUAD $0x00d8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 216]
  4981  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  4982  	QUAD $0x00d0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 208]
  4983  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  4984  	QUAD $0x00c8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 200]
  4985  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  4986  	QUAD $0x00c0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 192]
  4987  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
  4988  	LONG $0xe56cc9c5                           // vpunpcklqdq    xmm4, xmm6, xmm5
  4989  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  4990  	LONG $0xef6cd1c5                           // vpunpcklqdq    xmm5, xmm5, xmm7
  4991  	QUAD $0x00f8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 248]
  4992  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  4993  	QUAD $0x00f0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 240]
  4994  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  4995  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
  4996  	QUAD $0x00e8fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 232]
  4997  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  4998  	QUAD $0x00e0fa842cfbe1c4; WORD $0x0000     // vcvttsd2si    rax, qword [rdx + 8*rdi + 224]
  4999  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  5000  	LONG $0xc76cf9c5                           // vpunpcklqdq    xmm0, xmm0, xmm7
  5001  	QUAD $0x000090f98c7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 144], xmm1
  5002  	QUAD $0x000080f9847f7ac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 128], xmm8
  5003  	QUAD $0x0000a0f99c7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 160], xmm3
  5004  	QUAD $0x0000b0f9947ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 176], xmm2
  5005  	QUAD $0x0000c0f9ac7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 192], xmm5
  5006  	QUAD $0x0000d0f9a47ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 208], xmm4
  5007  	QUAD $0x0000e0f9847ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 224], xmm0
  5008  	QUAD $0x0000f0f9b47ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 240], xmm6
  5009  	LONG $0x20c78348                           // add    rdi, 32
  5010  	LONG $0x02c28349                           // add    r10, 2
  5011  	JNE  LBB0_631
  5012  	JMP  LBB0_1111
  5013  
  5014  LBB0_632:
  5015  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5016  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5017  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5018  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5019  	LONG $0x04e8c149         // shr    r8, 4
  5020  	LONG $0x01c08349         // add    r8, 1
  5021  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5022  	JE   LBB0_1115
  5023  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5024  	LONG $0xfee08348         // and    rax, -2
  5025  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5026  	WORD $0xff31             // xor    edi, edi
  5027  
  5028  LBB0_634:
  5029  	LONG $0x045afdc5; BYTE $0xfa         // vcvtpd2ps    xmm0, yword [rdx + 8*rdi]
  5030  	LONG $0x4c5afdc5; WORD $0x20fa       // vcvtpd2ps    xmm1, yword [rdx + 8*rdi + 32]
  5031  	LONG $0x545afdc5; WORD $0x40fa       // vcvtpd2ps    xmm2, yword [rdx + 8*rdi + 64]
  5032  	LONG $0x5c5afdc5; WORD $0x60fa       // vcvtpd2ps    xmm3, yword [rdx + 8*rdi + 96]
  5033  	LONG $0x0411f9c5; BYTE $0xb9         // vmovupd    oword [rcx + 4*rdi], xmm0
  5034  	LONG $0x4c11f9c5; WORD $0x10b9       // vmovupd    oword [rcx + 4*rdi + 16], xmm1
  5035  	LONG $0x5411f9c5; WORD $0x20b9       // vmovupd    oword [rcx + 4*rdi + 32], xmm2
  5036  	LONG $0x5c11f9c5; WORD $0x30b9       // vmovupd    oword [rcx + 4*rdi + 48], xmm3
  5037  	QUAD $0x000080fa845afdc5; BYTE $0x00 // vcvtpd2ps    xmm0, yword [rdx + 8*rdi + 128]
  5038  	QUAD $0x0000a0fa8c5afdc5; BYTE $0x00 // vcvtpd2ps    xmm1, yword [rdx + 8*rdi + 160]
  5039  	QUAD $0x0000c0fa945afdc5; BYTE $0x00 // vcvtpd2ps    xmm2, yword [rdx + 8*rdi + 192]
  5040  	QUAD $0x0000e0fa9c5afdc5; BYTE $0x00 // vcvtpd2ps    xmm3, yword [rdx + 8*rdi + 224]
  5041  	LONG $0x4411f9c5; WORD $0x40b9       // vmovupd    oword [rcx + 4*rdi + 64], xmm0
  5042  	LONG $0x4c11f9c5; WORD $0x50b9       // vmovupd    oword [rcx + 4*rdi + 80], xmm1
  5043  	LONG $0x5411f9c5; WORD $0x60b9       // vmovupd    oword [rcx + 4*rdi + 96], xmm2
  5044  	LONG $0x5c11f9c5; WORD $0x70b9       // vmovupd    oword [rcx + 4*rdi + 112], xmm3
  5045  	LONG $0x20c78348                     // add    rdi, 32
  5046  	LONG $0x02c08348                     // add    rax, 2
  5047  	JNE  LBB0_634
  5048  	JMP  LBB0_1116
  5049  
  5050  LBB0_644:
  5051  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5052  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5053  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5054  	WORD $0x8949; BYTE $0xc2 // mov    r10, rax
  5055  	LONG $0x02eac149         // shr    r10, 2
  5056  	LONG $0x01c28349         // add    r10, 1
  5057  	WORD $0x8945; BYTE $0xd0 // mov    r8d, r10d
  5058  	LONG $0x03e08341         // and    r8d, 3
  5059  	LONG $0x0cf88348         // cmp    rax, 12
  5060  	JAE  LBB0_850
  5061  	WORD $0xc031             // xor    eax, eax
  5062  	JMP  LBB0_852
  5063  
  5064  LBB0_646:
  5065  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5066  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5067  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5068  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5069  	LONG $0x04e8c149         // shr    r8, 4
  5070  	LONG $0x01c08349         // add    r8, 1
  5071  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5072  	JE   LBB0_975
  5073  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5074  	LONG $0xfee08348         // and    rax, -2
  5075  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5076  	WORD $0xff31             // xor    edi, edi
  5077  
  5078  LBB0_648:
  5079  	LONG $0x347de2c4; WORD $0x7a04             // vpmovzxwq    ymm0, qword [rdx + 2*rdi]
  5080  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 8]
  5081  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 16]
  5082  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 24]
  5083  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  5084  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  5085  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  5086  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  5087  	LONG $0x347de2c4; WORD $0x7a44; BYTE $0x20 // vpmovzxwq    ymm0, qword [rdx + 2*rdi + 32]
  5088  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x28 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 40]
  5089  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x30 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 48]
  5090  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x38 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 56]
  5091  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  5092  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  5093  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  5094  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  5095  	LONG $0x20c78348                           // add    rdi, 32
  5096  	LONG $0x02c08348                           // add    rax, 2
  5097  	JNE  LBB0_648
  5098  	JMP  LBB0_976
  5099  
  5100  LBB0_649:
  5101  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5102  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5103  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5104  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5105  	LONG $0x05e8c149         // shr    r8, 5
  5106  	LONG $0x01c08349         // add    r8, 1
  5107  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5108  	JE   LBB0_1120
  5109  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5110  	LONG $0xfee08348         // and    rax, -2
  5111  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5112  	WORD $0xff31             // xor    edi, edi
  5113  
  5114  LBB0_651:
  5115  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  5116  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  5117  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  5118  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  5119  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  5120  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  5121  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  5122  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  5123  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
  5124  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5125  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5126  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5127  	LONG $0x337de2c4; WORD $0x7a44; BYTE $0x40 // vpmovzxwd    ymm0, oword [rdx + 2*rdi + 64]
  5128  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 80]
  5129  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x60 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 96]
  5130  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 112]
  5131  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  5132  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  5133  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  5134  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  5135  	QUAD $0x000080b98411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 128], ymm0
  5136  	QUAD $0x0000a0b98c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 160], ymm1
  5137  	QUAD $0x0000c0b99411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 192], ymm2
  5138  	QUAD $0x0000e0b99c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 224], ymm3
  5139  	LONG $0x40c78348                           // add    rdi, 64
  5140  	LONG $0x02c08348                           // add    rax, 2
  5141  	JNE  LBB0_651
  5142  	JMP  LBB0_1121
  5143  
  5144  LBB0_652:
  5145  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5146  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5147  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5148  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5149  	LONG $0x04e8c149         // shr    r8, 4
  5150  	LONG $0x01c08349         // add    r8, 1
  5151  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5152  	JE   LBB0_1125
  5153  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5154  	LONG $0xfee08348         // and    rax, -2
  5155  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5156  	WORD $0xff31             // xor    edi, edi
  5157  
  5158  LBB0_654:
  5159  	LONG $0x247de2c4; WORD $0x7a04             // vpmovsxwq    ymm0, qword [rdx + 2*rdi]
  5160  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 8]
  5161  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 16]
  5162  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 24]
  5163  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  5164  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  5165  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  5166  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  5167  	LONG $0x247de2c4; WORD $0x7a44; BYTE $0x20 // vpmovsxwq    ymm0, qword [rdx + 2*rdi + 32]
  5168  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x28 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 40]
  5169  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x30 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 48]
  5170  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x38 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 56]
  5171  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  5172  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  5173  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  5174  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  5175  	LONG $0x20c78348                           // add    rdi, 32
  5176  	LONG $0x02c08348                           // add    rax, 2
  5177  	JNE  LBB0_654
  5178  	JMP  LBB0_1126
  5179  
  5180  LBB0_655:
  5181  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5182  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5183  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5184  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5185  	LONG $0x05e8c149         // shr    r8, 5
  5186  	LONG $0x01c08349         // add    r8, 1
  5187  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5188  	JE   LBB0_1130
  5189  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5190  	LONG $0xfee08348         // and    rax, -2
  5191  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5192  	WORD $0xff31             // xor    edi, edi
  5193  
  5194  LBB0_657:
  5195  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
  5196  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
  5197  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
  5198  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
  5199  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  5200  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  5201  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  5202  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  5203  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
  5204  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5205  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5206  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5207  	LONG $0x237de2c4; WORD $0x7a44; BYTE $0x40 // vpmovsxwd    ymm0, oword [rdx + 2*rdi + 64]
  5208  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 80]
  5209  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x60 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 96]
  5210  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 112]
  5211  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  5212  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  5213  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  5214  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  5215  	QUAD $0x000080b98411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 128], ymm0
  5216  	QUAD $0x0000a0b98c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 160], ymm1
  5217  	QUAD $0x0000c0b99411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 192], ymm2
  5218  	QUAD $0x0000e0b99c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 224], ymm3
  5219  	LONG $0x40c78348                           // add    rdi, 64
  5220  	LONG $0x02c08348                           // add    rax, 2
  5221  	JNE  LBB0_657
  5222  	JMP  LBB0_1131
  5223  
  5224  LBB0_661:
  5225  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5226  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5227  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5228  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5229  	LONG $0x04e8c149         // shr    r8, 4
  5230  	LONG $0x01c08349         // add    r8, 1
  5231  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5232  	JE   LBB0_1135
  5233  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
  5234  	LONG $0xfee28349         // and    r10, -2
  5235  	WORD $0xf749; BYTE $0xda // neg    r10
  5236  	WORD $0xff31             // xor    edi, edi
  5237  
  5238  LBB0_663:
  5239  	LONG $0x046ffac5; BYTE $0xfa         // vmovdqu    xmm0, oword [rdx + 8*rdi]
  5240  	LONG $0x16f9e3c4; WORD $0x01c0       // vpextrq    rax, xmm0, 1
  5241  	LONG $0x4c6ffac5; WORD $0x10fa       // vmovdqu    xmm1, oword [rdx + 8*rdi + 16]
  5242  	LONG $0x2abae1c4; BYTE $0xd0         // vcvtsi2ss    xmm2, xmm8, rax
  5243  	LONG $0x7ef9e1c4; BYTE $0xc0         // vmovq    rax, xmm0
  5244  	LONG $0x2abae1c4; BYTE $0xc0         // vcvtsi2ss    xmm0, xmm8, rax
  5245  	LONG $0x7ef9e1c4; BYTE $0xc8         // vmovq    rax, xmm1
  5246  	LONG $0x2abae1c4; BYTE $0xd8         // vcvtsi2ss    xmm3, xmm8, rax
  5247  	LONG $0x16f9e3c4; WORD $0x01c8       // vpextrq    rax, xmm1, 1
  5248  	LONG $0x2abae1c4; BYTE $0xc8         // vcvtsi2ss    xmm1, xmm8, rax
  5249  	LONG $0x646ffac5; WORD $0x20fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 32]
  5250  	LONG $0x16f9e3c4; WORD $0x01e0       // vpextrq    rax, xmm4, 1
  5251  	LONG $0x6c6ffac5; WORD $0x30fa       // vmovdqu    xmm5, oword [rdx + 8*rdi + 48]
  5252  	LONG $0x2abae1c4; BYTE $0xf0         // vcvtsi2ss    xmm6, xmm8, rax
  5253  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  5254  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5255  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  5256  	LONG $0x2abae1c4; BYTE $0xf8         // vcvtsi2ss    xmm7, xmm8, rax
  5257  	LONG $0x2179e3c4; WORD $0x10c2       // vinsertps    xmm0, xmm0, xmm2, 16
  5258  	LONG $0x2179e3c4; WORD $0x20c3       // vinsertps    xmm0, xmm0, xmm3, 32
  5259  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  5260  	LONG $0x2179e3c4; WORD $0x30c1       // vinsertps    xmm0, xmm0, xmm1, 48
  5261  	LONG $0x2abae1c4; BYTE $0xc8         // vcvtsi2ss    xmm1, xmm8, rax
  5262  	LONG $0x2159e3c4; WORD $0x10d6       // vinsertps    xmm2, xmm4, xmm6, 16
  5263  	LONG $0x5c6ffac5; WORD $0x40fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
  5264  	LONG $0x16f9e3c4; WORD $0x01d8       // vpextrq    rax, xmm3, 1
  5265  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5266  	LONG $0x7ef9e1c4; BYTE $0xd8         // vmovq    rax, xmm3
  5267  	LONG $0x2abae1c4; BYTE $0xd8         // vcvtsi2ss    xmm3, xmm8, rax
  5268  	LONG $0x6c6ffac5; WORD $0x50fa       // vmovdqu    xmm5, oword [rdx + 8*rdi + 80]
  5269  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  5270  	LONG $0x2abae1c4; BYTE $0xf0         // vcvtsi2ss    xmm6, xmm8, rax
  5271  	LONG $0x2169e3c4; WORD $0x20d7       // vinsertps    xmm2, xmm2, xmm7, 32
  5272  	LONG $0x2169e3c4; WORD $0x30c9       // vinsertps    xmm1, xmm2, xmm1, 48
  5273  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  5274  	LONG $0x2161e3c4; WORD $0x10d4       // vinsertps    xmm2, xmm3, xmm4, 16
  5275  	LONG $0x2abae1c4; BYTE $0xd8         // vcvtsi2ss    xmm3, xmm8, rax
  5276  	LONG $0x2169e3c4; WORD $0x20d6       // vinsertps    xmm2, xmm2, xmm6, 32
  5277  	LONG $0x646ffac5; WORD $0x60fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
  5278  	LONG $0x16f9e3c4; WORD $0x01e0       // vpextrq    rax, xmm4, 1
  5279  	LONG $0x2abae1c4; BYTE $0xe8         // vcvtsi2ss    xmm5, xmm8, rax
  5280  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  5281  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5282  	LONG $0x746ffac5; WORD $0x70fa       // vmovdqu    xmm6, oword [rdx + 8*rdi + 112]
  5283  	LONG $0x7ef9e1c4; BYTE $0xf0         // vmovq    rax, xmm6
  5284  	LONG $0x2abae1c4; BYTE $0xf8         // vcvtsi2ss    xmm7, xmm8, rax
  5285  	LONG $0x2169e3c4; WORD $0x30d3       // vinsertps    xmm2, xmm2, xmm3, 48
  5286  	LONG $0x2159e3c4; WORD $0x10dd       // vinsertps    xmm3, xmm4, xmm5, 16
  5287  	LONG $0x16f9e3c4; WORD $0x01f0       // vpextrq    rax, xmm6, 1
  5288  	LONG $0x2161e3c4; WORD $0x20df       // vinsertps    xmm3, xmm3, xmm7, 32
  5289  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5290  	LONG $0x2161e3c4; WORD $0x30dc       // vinsertps    xmm3, xmm3, xmm4, 48
  5291  	LONG $0x0411f8c5; BYTE $0xb9         // vmovups    oword [rcx + 4*rdi], xmm0
  5292  	LONG $0x4c11f8c5; WORD $0x10b9       // vmovups    oword [rcx + 4*rdi + 16], xmm1
  5293  	LONG $0x5411f8c5; WORD $0x20b9       // vmovups    oword [rcx + 4*rdi + 32], xmm2
  5294  	LONG $0x5c11f8c5; WORD $0x30b9       // vmovups    oword [rcx + 4*rdi + 48], xmm3
  5295  	QUAD $0x000080fa846ffac5; BYTE $0x00 // vmovdqu    xmm0, oword [rdx + 8*rdi + 128]
  5296  	LONG $0x16f9e3c4; WORD $0x01c0       // vpextrq    rax, xmm0, 1
  5297  	QUAD $0x000090fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 144]
  5298  	LONG $0x2abae1c4; BYTE $0xd0         // vcvtsi2ss    xmm2, xmm8, rax
  5299  	LONG $0x7ef9e1c4; BYTE $0xc0         // vmovq    rax, xmm0
  5300  	LONG $0x2abae1c4; BYTE $0xc0         // vcvtsi2ss    xmm0, xmm8, rax
  5301  	LONG $0x7ef9e1c4; BYTE $0xc8         // vmovq    rax, xmm1
  5302  	LONG $0x2abae1c4; BYTE $0xd8         // vcvtsi2ss    xmm3, xmm8, rax
  5303  	LONG $0x16f9e3c4; WORD $0x01c8       // vpextrq    rax, xmm1, 1
  5304  	LONG $0x2abae1c4; BYTE $0xc8         // vcvtsi2ss    xmm1, xmm8, rax
  5305  	QUAD $0x0000a0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 160]
  5306  	LONG $0x16f9e3c4; WORD $0x01e0       // vpextrq    rax, xmm4, 1
  5307  	LONG $0x2abae1c4; BYTE $0xe8         // vcvtsi2ss    xmm5, xmm8, rax
  5308  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  5309  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5310  	LONG $0x2179e3c4; WORD $0x10c2       // vinsertps    xmm0, xmm0, xmm2, 16
  5311  	QUAD $0x0000b0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 176]
  5312  	LONG $0x16f9c3c4; WORD $0x01d3       // vpextrq    r11, xmm2, 1
  5313  	LONG $0x7ef9e1c4; BYTE $0xd0         // vmovq    rax, xmm2
  5314  	LONG $0x2abae1c4; BYTE $0xd0         // vcvtsi2ss    xmm2, xmm8, rax
  5315  	LONG $0x2179e3c4; WORD $0x20c3       // vinsertps    xmm0, xmm0, xmm3, 32
  5316  	LONG $0x2abac1c4; BYTE $0xdb         // vcvtsi2ss    xmm3, xmm8, r11
  5317  	LONG $0x2179e3c4; WORD $0x30c1       // vinsertps    xmm0, xmm0, xmm1, 48
  5318  	QUAD $0x0000c0fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 192]
  5319  	LONG $0x16f9e3c4; WORD $0x01c8       // vpextrq    rax, xmm1, 1
  5320  	LONG $0x2159e3c4; WORD $0x10e5       // vinsertps    xmm4, xmm4, xmm5, 16
  5321  	LONG $0x2abae1c4; BYTE $0xe8         // vcvtsi2ss    xmm5, xmm8, rax
  5322  	LONG $0x7ef9e1c4; BYTE $0xc8         // vmovq    rax, xmm1
  5323  	LONG $0x2abae1c4; BYTE $0xc8         // vcvtsi2ss    xmm1, xmm8, rax
  5324  	LONG $0x2159e3c4; WORD $0x20d2       // vinsertps    xmm2, xmm4, xmm2, 32
  5325  	QUAD $0x0000d0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 208]
  5326  	LONG $0x16f9c3c4; WORD $0x01e3       // vpextrq    r11, xmm4, 1
  5327  	LONG $0x7ef9e1c4; BYTE $0xe0         // vmovq    rax, xmm4
  5328  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5329  	LONG $0x2169e3c4; WORD $0x30d3       // vinsertps    xmm2, xmm2, xmm3, 48
  5330  	LONG $0x2abac1c4; BYTE $0xdb         // vcvtsi2ss    xmm3, xmm8, r11
  5331  	LONG $0x2171e3c4; WORD $0x10cd       // vinsertps    xmm1, xmm1, xmm5, 16
  5332  	QUAD $0x0000e0faac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 8*rdi + 224]
  5333  	LONG $0x16f9e3c4; WORD $0x01e8       // vpextrq    rax, xmm5, 1
  5334  	LONG $0x2171e3c4; WORD $0x20cc       // vinsertps    xmm1, xmm1, xmm4, 32
  5335  	LONG $0x2abae1c4; BYTE $0xe0         // vcvtsi2ss    xmm4, xmm8, rax
  5336  	LONG $0x7ef9e1c4; BYTE $0xe8         // vmovq    rax, xmm5
  5337  	LONG $0x2abae1c4; BYTE $0xe8         // vcvtsi2ss    xmm5, xmm8, rax
  5338  	LONG $0x2171e3c4; WORD $0x30cb       // vinsertps    xmm1, xmm1, xmm3, 48
  5339  	QUAD $0x0000f0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 240]
  5340  	LONG $0x16f9c3c4; WORD $0x01db       // vpextrq    r11, xmm3, 1
  5341  	LONG $0x7ef9e1c4; BYTE $0xd8         // vmovq    rax, xmm3
  5342  	LONG $0x2abae1c4; BYTE $0xd8         // vcvtsi2ss    xmm3, xmm8, rax
  5343  	LONG $0x2151e3c4; WORD $0x10e4       // vinsertps    xmm4, xmm5, xmm4, 16
  5344  	LONG $0x2abac1c4; BYTE $0xeb         // vcvtsi2ss    xmm5, xmm8, r11
  5345  	LONG $0x2159e3c4; WORD $0x20db       // vinsertps    xmm3, xmm4, xmm3, 32
  5346  	LONG $0x2161e3c4; WORD $0x30dd       // vinsertps    xmm3, xmm3, xmm5, 48
  5347  	LONG $0x4411f8c5; WORD $0x40b9       // vmovups    oword [rcx + 4*rdi + 64], xmm0
  5348  	LONG $0x5411f8c5; WORD $0x50b9       // vmovups    oword [rcx + 4*rdi + 80], xmm2
  5349  	LONG $0x4c11f8c5; WORD $0x60b9       // vmovups    oword [rcx + 4*rdi + 96], xmm1
  5350  	LONG $0x5c11f8c5; WORD $0x70b9       // vmovups    oword [rcx + 4*rdi + 112], xmm3
  5351  	LONG $0x20c78348                     // add    rdi, 32
  5352  	LONG $0x02c28349                     // add    r10, 2
  5353  	JNE  LBB0_663
  5354  	JMP  LBB0_1136
  5355  
  5356  LBB0_664:
  5357  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5358  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5359  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5360  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5361  	LONG $0x04e8c149         // shr    r8, 4
  5362  	LONG $0x01c08349         // add    r8, 1
  5363  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5364  	JE   LBB0_1140
  5365  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
  5366  	LONG $0xfee28349         // and    r10, -2
  5367  	WORD $0xf749; BYTE $0xda // neg    r10
  5368  	WORD $0xff31             // xor    edi, edi
  5369  
  5370  LBB0_666:
  5371  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x04 // vcvttss2si    rbx, dword [rdx + 4*rdi + 4]
  5372  	LONG $0x6ef9e1c4; BYTE $0xc3               // vmovq    xmm0, rbx
  5373  	LONG $0x2cfae1c4; WORD $0xba1c             // vcvttss2si    rbx, dword [rdx + 4*rdi]
  5374  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  5375  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x0c // vcvttss2si    rbx, dword [rdx + 4*rdi + 12]
  5376  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
  5377  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x08 // vcvttss2si    rax, dword [rdx + 4*rdi + 8]
  5378  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  5379  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x1c // vcvttss2si    rbx, dword [rdx + 4*rdi + 28]
  5380  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  5381  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x18 // vcvttss2si    rax, dword [rdx + 4*rdi + 24]
  5382  	LONG $0xc96ce9c5                           // vpunpcklqdq    xmm1, xmm2, xmm1
  5383  	LONG $0x6ef9e1c4; BYTE $0xd3               // vmovq    xmm2, rbx
  5384  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  5385  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
  5386  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x14 // vcvttss2si    rax, dword [rdx + 4*rdi + 20]
  5387  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  5388  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x10 // vcvttss2si    rax, dword [rdx + 4*rdi + 16]
  5389  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  5390  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
  5391  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x2c // vcvttss2si    rax, dword [rdx + 4*rdi + 44]
  5392  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  5393  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x28 // vcvttss2si    rax, dword [rdx + 4*rdi + 40]
  5394  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  5395  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x24 // vcvttss2si    rax, dword [rdx + 4*rdi + 36]
  5396  	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
  5397  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x20 // vcvttss2si    rbx, dword [rdx + 4*rdi + 32]
  5398  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  5399  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x3c // vcvttss2si    rax, dword [rdx + 4*rdi + 60]
  5400  	LONG $0x6ef9e1c4; BYTE $0xf3               // vmovq    xmm6, rbx
  5401  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x38 // vcvttss2si    rbx, dword [rdx + 4*rdi + 56]
  5402  	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
  5403  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  5404  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
  5405  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
  5406  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x34 // vcvttss2si    rax, dword [rdx + 4*rdi + 52]
  5407  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  5408  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x30 // vcvttss2si    rax, dword [rdx + 4*rdi + 48]
  5409  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  5410  	LONG $0xc76cf9c5                           // vpunpcklqdq    xmm0, xmm0, xmm7
  5411  	LONG $0x4c7ffac5; WORD $0x10f9             // vmovdqu    oword [rcx + 8*rdi + 16], xmm1
  5412  	LONG $0x047f7ac5; BYTE $0xf9               // vmovdqu    oword [rcx + 8*rdi], xmm8
  5413  	LONG $0x5c7ffac5; WORD $0x20f9             // vmovdqu    oword [rcx + 8*rdi + 32], xmm3
  5414  	LONG $0x547ffac5; WORD $0x30f9             // vmovdqu    oword [rcx + 8*rdi + 48], xmm2
  5415  	LONG $0x6c7ffac5; WORD $0x40f9             // vmovdqu    oword [rcx + 8*rdi + 64], xmm5
  5416  	LONG $0x647ffac5; WORD $0x50f9             // vmovdqu    oword [rcx + 8*rdi + 80], xmm4
  5417  	LONG $0x447ffac5; WORD $0x60f9             // vmovdqu    oword [rcx + 8*rdi + 96], xmm0
  5418  	LONG $0x747ffac5; WORD $0x70f9             // vmovdqu    oword [rcx + 8*rdi + 112], xmm6
  5419  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x44 // vcvttss2si    rax, dword [rdx + 4*rdi + 68]
  5420  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x40 // vcvttss2si    rbx, dword [rdx + 4*rdi + 64]
  5421  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  5422  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x4c // vcvttss2si    rax, dword [rdx + 4*rdi + 76]
  5423  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  5424  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x48 // vcvttss2si    rbx, dword [rdx + 4*rdi + 72]
  5425  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  5426  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
  5427  	LONG $0x6ef9e1c4; BYTE $0xcb               // vmovq    xmm1, rbx
  5428  	LONG $0xca6cf1c5                           // vpunpcklqdq    xmm1, xmm1, xmm2
  5429  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x5c // vcvttss2si    rax, dword [rdx + 4*rdi + 92]
  5430  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  5431  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x58 // vcvttss2si    rax, dword [rdx + 4*rdi + 88]
  5432  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  5433  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
  5434  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x54 // vcvttss2si    rax, dword [rdx + 4*rdi + 84]
  5435  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  5436  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x50 // vcvttss2si    rax, dword [rdx + 4*rdi + 80]
  5437  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  5438  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x6c // vcvttss2si    rax, dword [rdx + 4*rdi + 108]
  5439  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  5440  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x68 // vcvttss2si    rax, dword [rdx + 4*rdi + 104]
  5441  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  5442  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x64 // vcvttss2si    rax, dword [rdx + 4*rdi + 100]
  5443  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  5444  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x60 // vcvttss2si    rax, dword [rdx + 4*rdi + 96]
  5445  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
  5446  	LONG $0xe56cc9c5                           // vpunpcklqdq    xmm4, xmm6, xmm5
  5447  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  5448  	LONG $0xef6cd1c5                           // vpunpcklqdq    xmm5, xmm5, xmm7
  5449  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x7c // vcvttss2si    rax, dword [rdx + 4*rdi + 124]
  5450  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  5451  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x78 // vcvttss2si    rax, dword [rdx + 4*rdi + 120]
  5452  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  5453  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
  5454  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x74 // vcvttss2si    rax, dword [rdx + 4*rdi + 116]
  5455  	LONG $0x6ef9e1c4; BYTE $0xf8               // vmovq    xmm7, rax
  5456  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x70 // vcvttss2si    rax, dword [rdx + 4*rdi + 112]
  5457  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  5458  	LONG $0xc76cf9c5                           // vpunpcklqdq    xmm0, xmm0, xmm7
  5459  	QUAD $0x000090f98c7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 144], xmm1
  5460  	QUAD $0x000080f9847f7ac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 128], xmm8
  5461  	QUAD $0x0000a0f99c7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 160], xmm3
  5462  	QUAD $0x0000b0f9947ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 176], xmm2
  5463  	QUAD $0x0000c0f9ac7ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 192], xmm5
  5464  	QUAD $0x0000d0f9a47ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 208], xmm4
  5465  	QUAD $0x0000e0f9847ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 224], xmm0
  5466  	QUAD $0x0000f0f9b47ffac5; BYTE $0x00       // vmovdqu    oword [rcx + 8*rdi + 240], xmm6
  5467  	LONG $0x20c78348                           // add    rdi, 32
  5468  	LONG $0x02c28349                           // add    r10, 2
  5469  	JNE  LBB0_666
  5470  	JMP  LBB0_1141
  5471  
  5472  LBB0_676:
  5473  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5474  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5475  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5476  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5477  	LONG $0x04e8c149         // shr    r8, 4
  5478  	LONG $0x01c08349         // add    r8, 1
  5479  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5480  	JE   LBB0_980
  5481  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5482  	LONG $0xfee08348         // and    rax, -2
  5483  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5484  	WORD $0xff31             // xor    edi, edi
  5485  
  5486  LBB0_678:
  5487  	LONG $0x257de2c4; WORD $0xba04             // vpmovsxdq    ymm0, oword [rdx + 4*rdi]
  5488  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x10 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 16]
  5489  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x20 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 32]
  5490  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x30 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 48]
  5491  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  5492  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  5493  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  5494  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  5495  	LONG $0x257de2c4; WORD $0xba44; BYTE $0x40 // vpmovsxdq    ymm0, oword [rdx + 4*rdi + 64]
  5496  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x50 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 80]
  5497  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x60 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 96]
  5498  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x70 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 112]
  5499  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  5500  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  5501  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  5502  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  5503  	LONG $0x20c78348                           // add    rdi, 32
  5504  	LONG $0x02c08348                           // add    rax, 2
  5505  	JNE  LBB0_678
  5506  	JMP  LBB0_981
  5507  
  5508  LBB0_679:
  5509  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5510  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5511  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5512  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5513  	LONG $0x05e8c149         // shr    r8, 5
  5514  	LONG $0x01c08349         // add    r8, 1
  5515  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5516  	JE   LBB0_985
  5517  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5518  	LONG $0xfee08348         // and    rax, -2
  5519  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5520  	WORD $0xff31             // xor    edi, edi
  5521  
  5522  LBB0_681:
  5523  	LONG $0x045bfcc5; BYTE $0xba         // vcvtdq2ps    ymm0, yword [rdx + 4*rdi]
  5524  	LONG $0x4c5bfcc5; WORD $0x20ba       // vcvtdq2ps    ymm1, yword [rdx + 4*rdi + 32]
  5525  	LONG $0x545bfcc5; WORD $0x40ba       // vcvtdq2ps    ymm2, yword [rdx + 4*rdi + 64]
  5526  	LONG $0x5c5bfcc5; WORD $0x60ba       // vcvtdq2ps    ymm3, yword [rdx + 4*rdi + 96]
  5527  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  5528  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5529  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5530  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5531  	QUAD $0x000080ba845bfcc5; BYTE $0x00 // vcvtdq2ps    ymm0, yword [rdx + 4*rdi + 128]
  5532  	QUAD $0x0000a0ba8c5bfcc5; BYTE $0x00 // vcvtdq2ps    ymm1, yword [rdx + 4*rdi + 160]
  5533  	QUAD $0x0000c0ba945bfcc5; BYTE $0x00 // vcvtdq2ps    ymm2, yword [rdx + 4*rdi + 192]
  5534  	QUAD $0x0000e0ba9c5bfcc5; BYTE $0x00 // vcvtdq2ps    ymm3, yword [rdx + 4*rdi + 224]
  5535  	QUAD $0x000080b98411fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 128], ymm0
  5536  	QUAD $0x0000a0b98c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 160], ymm1
  5537  	QUAD $0x0000c0b99411fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 192], ymm2
  5538  	QUAD $0x0000e0b99c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 4*rdi + 224], ymm3
  5539  	LONG $0x40c78348                     // add    rdi, 64
  5540  	LONG $0x02c08348                     // add    rax, 2
  5541  	JNE  LBB0_681
  5542  	JMP  LBB0_986
  5543  
  5544  LBB0_715:
  5545  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5546  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5547  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5548  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5549  	LONG $0x04e8c149         // shr    r8, 4
  5550  	LONG $0x01c08349         // add    r8, 1
  5551  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5552  	JE   LBB0_990
  5553  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5554  	LONG $0xfee08348         // and    rax, -2
  5555  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5556  	WORD $0xff31             // xor    edi, edi
  5557  
  5558  LBB0_717:
  5559  	LONG $0x04e6fdc5; BYTE $0xfa         // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  5560  	LONG $0x4ce6fdc5; WORD $0x20fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  5561  	LONG $0x54e6fdc5; WORD $0x40fa       // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  5562  	LONG $0x5ce6fdc5; WORD $0x60fa       // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  5563  	LONG $0x0411f9c5; BYTE $0xb9         // vmovupd    oword [rcx + 4*rdi], xmm0
  5564  	LONG $0x4c11f9c5; WORD $0x10b9       // vmovupd    oword [rcx + 4*rdi + 16], xmm1
  5565  	LONG $0x5411f9c5; WORD $0x20b9       // vmovupd    oword [rcx + 4*rdi + 32], xmm2
  5566  	LONG $0x5c11f9c5; WORD $0x30b9       // vmovupd    oword [rcx + 4*rdi + 48], xmm3
  5567  	QUAD $0x000080fa84e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm0, yword [rdx + 8*rdi + 128]
  5568  	QUAD $0x0000a0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 160]
  5569  	QUAD $0x0000c0fa94e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 192]
  5570  	QUAD $0x0000e0fa9ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 224]
  5571  	LONG $0x4411f9c5; WORD $0x40b9       // vmovupd    oword [rcx + 4*rdi + 64], xmm0
  5572  	LONG $0x4c11f9c5; WORD $0x50b9       // vmovupd    oword [rcx + 4*rdi + 80], xmm1
  5573  	LONG $0x5411f9c5; WORD $0x60b9       // vmovupd    oword [rcx + 4*rdi + 96], xmm2
  5574  	LONG $0x5c11f9c5; WORD $0x70b9       // vmovupd    oword [rcx + 4*rdi + 112], xmm3
  5575  	LONG $0x20c78348                     // add    rdi, 32
  5576  	LONG $0x02c08348                     // add    rax, 2
  5577  	JNE  LBB0_717
  5578  	JMP  LBB0_991
  5579  
  5580  LBB0_721:
  5581  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5582  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5583  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5584  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5585  	LONG $0x04e8c149         // shr    r8, 4
  5586  	LONG $0x01c08349         // add    r8, 1
  5587  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5588  	JE   LBB0_995
  5589  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5590  	LONG $0xfee08348         // and    rax, -2
  5591  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5592  	WORD $0xff31             // xor    edi, edi
  5593  
  5594  LBB0_723:
  5595  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  5596  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  5597  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  5598  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  5599  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  5600  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  5601  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  5602  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  5603  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  5604  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  5605  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  5606  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  5607  	QUAD $0x000080fa8410f8c5; BYTE $0x00       // vmovups    xmm0, oword [rdx + 8*rdi + 128]
  5608  	QUAD $0x0000a0fa8c10f8c5; BYTE $0x00       // vmovups    xmm1, oword [rdx + 8*rdi + 160]
  5609  	QUAD $0x0000c0fa9410f8c5; BYTE $0x00       // vmovups    xmm2, oword [rdx + 8*rdi + 192]
  5610  	QUAD $0x0000e0fa9c10f8c5; BYTE $0x00       // vmovups    xmm3, oword [rdx + 8*rdi + 224]
  5611  	QUAD $0x000090fa84c6f8c5; WORD $0x8800     // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 144], 136
  5612  	QUAD $0x0000b0fa8cc6f0c5; WORD $0x8800     // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 176], 136
  5613  	QUAD $0x0000d0fa94c6e8c5; WORD $0x8800     // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 208], 136
  5614  	QUAD $0x0000f0fa9cc6e0c5; WORD $0x8800     // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 240], 136
  5615  	LONG $0x4411f8c5; WORD $0x40b9             // vmovups    oword [rcx + 4*rdi + 64], xmm0
  5616  	LONG $0x4c11f8c5; WORD $0x50b9             // vmovups    oword [rcx + 4*rdi + 80], xmm1
  5617  	LONG $0x5411f8c5; WORD $0x60b9             // vmovups    oword [rcx + 4*rdi + 96], xmm2
  5618  	LONG $0x5c11f8c5; WORD $0x70b9             // vmovups    oword [rcx + 4*rdi + 112], xmm3
  5619  	LONG $0x20c78348                           // add    rdi, 32
  5620  	LONG $0x02c08348                           // add    rax, 2
  5621  	JNE  LBB0_723
  5622  	JMP  LBB0_996
  5623  
  5624  LBB0_724:
  5625  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5626  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5627  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5628  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5629  	LONG $0x05e8c149         // shr    r8, 5
  5630  	LONG $0x01c08349         // add    r8, 1
  5631  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5632  	JE   LBB0_1000
  5633  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5634  	LONG $0xfee08348         // and    rax, -2
  5635  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5636  	WORD $0xff31             // xor    edi, edi
  5637  
  5638  LBB0_726:
  5639  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  5640  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  5641  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  5642  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  5643  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  5644  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  5645  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  5646  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  5647  	LONG $0x337de2c4; WORD $0x7a44; BYTE $0x40 // vpmovzxwd    ymm0, oword [rdx + 2*rdi + 64]
  5648  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 80]
  5649  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x60 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 96]
  5650  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 112]
  5651  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  5652  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  5653  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  5654  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  5655  	LONG $0x40c78348                           // add    rdi, 64
  5656  	LONG $0x02c08348                           // add    rax, 2
  5657  	JNE  LBB0_726
  5658  	JMP  LBB0_1001
  5659  
  5660  LBB0_727:
  5661  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5662  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5663  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5664  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5665  	LONG $0x05e8c149         // shr    r8, 5
  5666  	LONG $0x01c08349         // add    r8, 1
  5667  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5668  	JE   LBB0_1005
  5669  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5670  	LONG $0xfee08348         // and    rax, -2
  5671  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5672  	WORD $0xff31             // xor    edi, edi
  5673  
  5674  LBB0_729:
  5675  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
  5676  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
  5677  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
  5678  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
  5679  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  5680  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  5681  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  5682  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  5683  	LONG $0x237de2c4; WORD $0x7a44; BYTE $0x40 // vpmovsxwd    ymm0, oword [rdx + 2*rdi + 64]
  5684  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x50 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 80]
  5685  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x60 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 96]
  5686  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x70 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 112]
  5687  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  5688  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  5689  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  5690  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  5691  	LONG $0x40c78348                           // add    rdi, 64
  5692  	LONG $0x02c08348                           // add    rax, 2
  5693  	JNE  LBB0_729
  5694  	JMP  LBB0_1006
  5695  
  5696  LBB0_730:
  5697  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5698  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5699  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5700  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5701  	LONG $0x04e8c149         // shr    r8, 4
  5702  	LONG $0x01c08349         // add    r8, 1
  5703  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5704  	JE   LBB0_1010
  5705  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5706  	LONG $0xfee08348         // and    rax, -2
  5707  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5708  	WORD $0xff31             // xor    edi, edi
  5709  
  5710  LBB0_732:
  5711  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  5712  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  5713  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  5714  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  5715  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  5716  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  5717  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  5718  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  5719  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  5720  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  5721  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  5722  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  5723  	QUAD $0x000080fa8410f8c5; BYTE $0x00       // vmovups    xmm0, oword [rdx + 8*rdi + 128]
  5724  	QUAD $0x0000a0fa8c10f8c5; BYTE $0x00       // vmovups    xmm1, oword [rdx + 8*rdi + 160]
  5725  	QUAD $0x0000c0fa9410f8c5; BYTE $0x00       // vmovups    xmm2, oword [rdx + 8*rdi + 192]
  5726  	QUAD $0x0000e0fa9c10f8c5; BYTE $0x00       // vmovups    xmm3, oword [rdx + 8*rdi + 224]
  5727  	QUAD $0x000090fa84c6f8c5; WORD $0x8800     // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 144], 136
  5728  	QUAD $0x0000b0fa8cc6f0c5; WORD $0x8800     // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 176], 136
  5729  	QUAD $0x0000d0fa94c6e8c5; WORD $0x8800     // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 208], 136
  5730  	QUAD $0x0000f0fa9cc6e0c5; WORD $0x8800     // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 240], 136
  5731  	LONG $0x4411f8c5; WORD $0x40b9             // vmovups    oword [rcx + 4*rdi + 64], xmm0
  5732  	LONG $0x4c11f8c5; WORD $0x50b9             // vmovups    oword [rcx + 4*rdi + 80], xmm1
  5733  	LONG $0x5411f8c5; WORD $0x60b9             // vmovups    oword [rcx + 4*rdi + 96], xmm2
  5734  	LONG $0x5c11f8c5; WORD $0x70b9             // vmovups    oword [rcx + 4*rdi + 112], xmm3
  5735  	LONG $0x20c78348                           // add    rdi, 32
  5736  	LONG $0x02c08348                           // add    rax, 2
  5737  	JNE  LBB0_732
  5738  	JMP  LBB0_1011
  5739  
  5740  LBB0_733:
  5741  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5742  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5743  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5744  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5745  	LONG $0x05e8c149         // shr    r8, 5
  5746  	LONG $0x01c08349         // add    r8, 1
  5747  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5748  	JE   LBB0_1015
  5749  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5750  	LONG $0xfee08348         // and    rax, -2
  5751  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5752  	WORD $0xff31             // xor    edi, edi
  5753  
  5754  LBB0_735:
  5755  	LONG $0x045bfec5; BYTE $0xba         // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  5756  	LONG $0x4c5bfec5; WORD $0x20ba       // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  5757  	LONG $0x545bfec5; WORD $0x40ba       // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  5758  	LONG $0x5c5bfec5; WORD $0x60ba       // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  5759  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  5760  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5761  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5762  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5763  	QUAD $0x000080ba845bfec5; BYTE $0x00 // vcvttps2dq    ymm0, yword [rdx + 4*rdi + 128]
  5764  	QUAD $0x0000a0ba8c5bfec5; BYTE $0x00 // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 160]
  5765  	QUAD $0x0000c0ba945bfec5; BYTE $0x00 // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 192]
  5766  	QUAD $0x0000e0ba9c5bfec5; BYTE $0x00 // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 224]
  5767  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  5768  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  5769  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  5770  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  5771  	LONG $0x40c78348                     // add    rdi, 64
  5772  	LONG $0x02c08348                     // add    rax, 2
  5773  	JNE  LBB0_735
  5774  	JMP  LBB0_1016
  5775  
  5776  LBB0_742:
  5777  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5778  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5779  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5780  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5781  	LONG $0x05e8c149         // shr    r8, 5
  5782  	LONG $0x01c08349         // add    r8, 1
  5783  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5784  	JE   LBB0_1185
  5785  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5786  	LONG $0xfee08348         // and    rax, -2
  5787  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5788  	WORD $0xff31             // xor    edi, edi
  5789  
  5790  LBB0_744:
  5791  	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
  5792  	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
  5793  	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
  5794  	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
  5795  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  5796  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5797  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5798  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5799  	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
  5800  	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
  5801  	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
  5802  	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
  5803  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  5804  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  5805  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  5806  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  5807  	LONG $0x40c78348                     // add    rdi, 64
  5808  	LONG $0x02c08348                     // add    rax, 2
  5809  	JNE  LBB0_744
  5810  	JMP  LBB0_1186
  5811  
  5812  LBB0_745:
  5813  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5814  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5815  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5816  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5817  	LONG $0x05e8c149         // shr    r8, 5
  5818  	LONG $0x01c08349         // add    r8, 1
  5819  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5820  	JE   LBB0_1193
  5821  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5822  	LONG $0xfee08348         // and    rax, -2
  5823  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5824  	WORD $0xff31             // xor    edi, edi
  5825  
  5826  LBB0_747:
  5827  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
  5828  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
  5829  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
  5830  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
  5831  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  5832  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  5833  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  5834  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  5835  	LONG $0x217de2c4; WORD $0x3a44; BYTE $0x20 // vpmovsxbd    ymm0, qword [rdx + rdi + 32]
  5836  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovsxbd    ymm1, qword [rdx + rdi + 40]
  5837  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x30 // vpmovsxbd    ymm2, qword [rdx + rdi + 48]
  5838  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovsxbd    ymm3, qword [rdx + rdi + 56]
  5839  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  5840  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  5841  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  5842  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  5843  	LONG $0x40c78348                           // add    rdi, 64
  5844  	LONG $0x02c08348                           // add    rax, 2
  5845  	JNE  LBB0_747
  5846  	JMP  LBB0_1194
  5847  
  5848  LBB0_748:
  5849  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5850  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5851  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5852  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5853  	LONG $0x05e8c149         // shr    r8, 5
  5854  	LONG $0x01c08349         // add    r8, 1
  5855  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5856  	JE   LBB0_1201
  5857  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5858  	LONG $0xfee08348         // and    rax, -2
  5859  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5860  	WORD $0xff31             // xor    edi, edi
  5861  
  5862  LBB0_750:
  5863  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
  5864  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
  5865  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
  5866  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
  5867  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  5868  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  5869  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  5870  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  5871  	LONG $0x317de2c4; WORD $0x3a44; BYTE $0x20 // vpmovzxbd    ymm0, qword [rdx + rdi + 32]
  5872  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovzxbd    ymm1, qword [rdx + rdi + 40]
  5873  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x30 // vpmovzxbd    ymm2, qword [rdx + rdi + 48]
  5874  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovzxbd    ymm3, qword [rdx + rdi + 56]
  5875  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  5876  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  5877  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  5878  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  5879  	LONG $0x40c78348                           // add    rdi, 64
  5880  	LONG $0x02c08348                           // add    rax, 2
  5881  	JNE  LBB0_750
  5882  	JMP  LBB0_1202
  5883  
  5884  LBB0_751:
  5885  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5886  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5887  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5888  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5889  	LONG $0x05e8c149         // shr    r8, 5
  5890  	LONG $0x01c08349         // add    r8, 1
  5891  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5892  	JE   LBB0_1209
  5893  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5894  	LONG $0xfee08348         // and    rax, -2
  5895  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5896  	WORD $0xff31             // xor    edi, edi
  5897  
  5898  LBB0_753:
  5899  	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
  5900  	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
  5901  	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
  5902  	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
  5903  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  5904  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  5905  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  5906  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  5907  	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
  5908  	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
  5909  	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
  5910  	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
  5911  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  5912  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  5913  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  5914  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  5915  	LONG $0x40c78348                     // add    rdi, 64
  5916  	LONG $0x02c08348                     // add    rax, 2
  5917  	JNE  LBB0_753
  5918  	JMP  LBB0_1210
  5919  
  5920  LBB0_754:
  5921  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5922  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5923  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5924  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5925  	LONG $0x04e8c149         // shr    r8, 4
  5926  	LONG $0x01c08349         // add    r8, 1
  5927  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5928  	JE   LBB0_1217
  5929  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5930  	LONG $0xfee08348         // and    rax, -2
  5931  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5932  	WORD $0xff31             // xor    edi, edi
  5933  
  5934  LBB0_756:
  5935  	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
  5936  	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
  5937  	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
  5938  	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
  5939  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  5940  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  5941  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  5942  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  5943  	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
  5944  	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
  5945  	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
  5946  	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
  5947  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  5948  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  5949  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  5950  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  5951  	LONG $0x20c78348                     // add    rdi, 32
  5952  	LONG $0x02c08348                     // add    rax, 2
  5953  	JNE  LBB0_756
  5954  	JMP  LBB0_1218
  5955  
  5956  LBB0_757:
  5957  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5958  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5959  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5960  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5961  	LONG $0x04e8c149         // shr    r8, 4
  5962  	LONG $0x01c08349         // add    r8, 1
  5963  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5964  	JE   LBB0_1225
  5965  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5966  	LONG $0xfee08348         // and    rax, -2
  5967  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5968  	WORD $0xff31             // xor    edi, edi
  5969  
  5970  LBB0_759:
  5971  	LONG $0x2179e2c4; WORD $0x3a04             // vpmovsxbd    xmm0, dword [rdx + rdi]
  5972  	LONG $0x2179e2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbd    xmm1, dword [rdx + rdi + 4]
  5973  	LONG $0x2179e2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbd    xmm2, dword [rdx + rdi + 8]
  5974  	LONG $0x2179e2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbd    xmm3, dword [rdx + rdi + 12]
  5975  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  5976  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  5977  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  5978  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  5979  	LONG $0x0411fcc5; BYTE $0xf9               // vmovups    yword [rcx + 8*rdi], ymm0
  5980  	LONG $0x4c11fcc5; WORD $0x20f9             // vmovups    yword [rcx + 8*rdi + 32], ymm1
  5981  	LONG $0x5411fcc5; WORD $0x40f9             // vmovups    yword [rcx + 8*rdi + 64], ymm2
  5982  	LONG $0x5c11fcc5; WORD $0x60f9             // vmovups    yword [rcx + 8*rdi + 96], ymm3
  5983  	LONG $0x2179e2c4; WORD $0x3a44; BYTE $0x10 // vpmovsxbd    xmm0, dword [rdx + rdi + 16]
  5984  	LONG $0x2179e2c4; WORD $0x3a4c; BYTE $0x14 // vpmovsxbd    xmm1, dword [rdx + rdi + 20]
  5985  	LONG $0x2179e2c4; WORD $0x3a54; BYTE $0x18 // vpmovsxbd    xmm2, dword [rdx + rdi + 24]
  5986  	LONG $0x2179e2c4; WORD $0x3a5c; BYTE $0x1c // vpmovsxbd    xmm3, dword [rdx + rdi + 28]
  5987  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  5988  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  5989  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  5990  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  5991  	QUAD $0x000080f98411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  5992  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  5993  	QUAD $0x0000c0f99411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  5994  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  5995  	LONG $0x20c78348                           // add    rdi, 32
  5996  	LONG $0x02c08348                           // add    rax, 2
  5997  	JNE  LBB0_759
  5998  	JMP  LBB0_1226
  5999  
  6000  LBB0_760:
  6001  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6002  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6003  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6004  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6005  	LONG $0x04e8c149         // shr    r8, 4
  6006  	LONG $0x01c08349         // add    r8, 1
  6007  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6008  	JE   LBB0_1233
  6009  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6010  	LONG $0xfee08348         // and    rax, -2
  6011  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6012  	WORD $0xff31             // xor    edi, edi
  6013  
  6014  LBB0_762:
  6015  	LONG $0x3179e2c4; WORD $0x3a04             // vpmovzxbd    xmm0, dword [rdx + rdi]
  6016  	LONG $0x3179e2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbd    xmm1, dword [rdx + rdi + 4]
  6017  	LONG $0x3179e2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbd    xmm2, dword [rdx + rdi + 8]
  6018  	LONG $0x3179e2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbd    xmm3, dword [rdx + rdi + 12]
  6019  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  6020  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  6021  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  6022  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  6023  	LONG $0x0411fcc5; BYTE $0xf9               // vmovups    yword [rcx + 8*rdi], ymm0
  6024  	LONG $0x4c11fcc5; WORD $0x20f9             // vmovups    yword [rcx + 8*rdi + 32], ymm1
  6025  	LONG $0x5411fcc5; WORD $0x40f9             // vmovups    yword [rcx + 8*rdi + 64], ymm2
  6026  	LONG $0x5c11fcc5; WORD $0x60f9             // vmovups    yword [rcx + 8*rdi + 96], ymm3
  6027  	LONG $0x3179e2c4; WORD $0x3a44; BYTE $0x10 // vpmovzxbd    xmm0, dword [rdx + rdi + 16]
  6028  	LONG $0x3179e2c4; WORD $0x3a4c; BYTE $0x14 // vpmovzxbd    xmm1, dword [rdx + rdi + 20]
  6029  	LONG $0x3179e2c4; WORD $0x3a54; BYTE $0x18 // vpmovzxbd    xmm2, dword [rdx + rdi + 24]
  6030  	LONG $0x3179e2c4; WORD $0x3a5c; BYTE $0x1c // vpmovzxbd    xmm3, dword [rdx + rdi + 28]
  6031  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  6032  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  6033  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  6034  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  6035  	QUAD $0x000080f98411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  6036  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  6037  	QUAD $0x0000c0f99411fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  6038  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  6039  	LONG $0x20c78348                           // add    rdi, 32
  6040  	LONG $0x02c08348                           // add    rax, 2
  6041  	JNE  LBB0_762
  6042  	JMP  LBB0_1234
  6043  
  6044  LBB0_763:
  6045  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  6046  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
  6047  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
  6048  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  6049  	LONG $0x05e8c149             // shr    r8, 5
  6050  	LONG $0x01c08349             // add    r8, 1
  6051  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  6052  	JE   LBB0_1241
  6053  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  6054  	LONG $0xfee08348             // and    rax, -2
  6055  	WORD $0xf748; BYTE $0xd8     // neg    rax
  6056  	WORD $0xff31                 // xor    edi, edi
  6057  	LONG $0x456ff9c5; BYTE $0x70 // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
  6058  
  6059  LBB0_765:
  6060  	LONG $0x0c6ffac5; BYTE $0xba         // vmovdqu    xmm1, oword [rdx + 4*rdi]
  6061  	LONG $0x546ffac5; WORD $0x10ba       // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
  6062  	LONG $0x5c6ffac5; WORD $0x20ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
  6063  	LONG $0x646ffac5; WORD $0x30ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
  6064  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6065  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6066  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6067  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6068  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6069  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  6070  	LONG $0x5c6ffac5; WORD $0x50ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
  6071  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6072  	LONG $0x646ffac5; WORD $0x40ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
  6073  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6074  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  6075  	LONG $0x646ffac5; WORD $0x70ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
  6076  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6077  	LONG $0x6c6ffac5; WORD $0x60ba       // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
  6078  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  6079  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  6080  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  6081  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  6082  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  6083  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  6084  	LONG $0x0c7ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm1
  6085  	QUAD $0x000080ba8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 4*rdi + 128]
  6086  	QUAD $0x000090ba946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 4*rdi + 144]
  6087  	QUAD $0x0000a0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 160]
  6088  	QUAD $0x0000b0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 176]
  6089  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6090  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6091  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6092  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6093  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6094  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  6095  	QUAD $0x0000d0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 208]
  6096  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6097  	QUAD $0x0000c0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 192]
  6098  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6099  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  6100  	QUAD $0x0000f0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 240]
  6101  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6102  	QUAD $0x0000e0baac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 4*rdi + 224]
  6103  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  6104  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  6105  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  6106  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  6107  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  6108  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  6109  	LONG $0x4c7ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm1
  6110  	LONG $0x40c78348                     // add    rdi, 64
  6111  	LONG $0x02c08348                     // add    rax, 2
  6112  	JNE  LBB0_765
  6113  	JMP  LBB0_1242
  6114  
  6115  LBB0_766:
  6116  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6117  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6118  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6119  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6120  	LONG $0x04e8c149         // shr    r8, 4
  6121  	LONG $0x01c08349         // add    r8, 1
  6122  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6123  	JE   LBB0_1249
  6124  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6125  	LONG $0xfee08348         // and    rax, -2
  6126  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6127  	WORD $0xff31             // xor    edi, edi
  6128  
  6129  LBB0_768:
  6130  	LONG $0x04e6fdc5; BYTE $0xfa         // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  6131  	LONG $0xc06bf9c5                     // vpackssdw    xmm0, xmm0, xmm0
  6132  	LONG $0x4ce6fdc5; WORD $0x20fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  6133  	LONG $0xc063f9c5                     // vpacksswb    xmm0, xmm0, xmm0
  6134  	LONG $0xc96bf1c5                     // vpackssdw    xmm1, xmm1, xmm1
  6135  	LONG $0xc963f1c5                     // vpacksswb    xmm1, xmm1, xmm1
  6136  	LONG $0xc162f9c5                     // vpunpckldq    xmm0, xmm0, xmm1
  6137  	LONG $0x4ce6fdc5; WORD $0x40fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 64]
  6138  	LONG $0xc96bf1c5                     // vpackssdw    xmm1, xmm1, xmm1
  6139  	LONG $0xc963f1c5                     // vpacksswb    xmm1, xmm1, xmm1
  6140  	LONG $0x54e6fdc5; WORD $0x60fa       // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 96]
  6141  	LONG $0xd26be9c5                     // vpackssdw    xmm2, xmm2, xmm2
  6142  	LONG $0xd263e9c5                     // vpacksswb    xmm2, xmm2, xmm2
  6143  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6144  	LONG $0xc16cf9c5                     // vpunpcklqdq    xmm0, xmm0, xmm1
  6145  	LONG $0x047ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm0
  6146  	QUAD $0x000080fa84e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm0, yword [rdx + 8*rdi + 128]
  6147  	QUAD $0x0000a0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 160]
  6148  	LONG $0xc06bf9c5                     // vpackssdw    xmm0, xmm0, xmm0
  6149  	LONG $0xc063f9c5                     // vpacksswb    xmm0, xmm0, xmm0
  6150  	LONG $0xc96bf1c5                     // vpackssdw    xmm1, xmm1, xmm1
  6151  	LONG $0xc963f1c5                     // vpacksswb    xmm1, xmm1, xmm1
  6152  	LONG $0xc162f9c5                     // vpunpckldq    xmm0, xmm0, xmm1
  6153  	QUAD $0x0000c0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 192]
  6154  	LONG $0xc96bf1c5                     // vpackssdw    xmm1, xmm1, xmm1
  6155  	QUAD $0x0000e0fa94e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 224]
  6156  	LONG $0xc963f1c5                     // vpacksswb    xmm1, xmm1, xmm1
  6157  	LONG $0xd26be9c5                     // vpackssdw    xmm2, xmm2, xmm2
  6158  	LONG $0xd263e9c5                     // vpacksswb    xmm2, xmm2, xmm2
  6159  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6160  	LONG $0xc16cf9c5                     // vpunpcklqdq    xmm0, xmm0, xmm1
  6161  	LONG $0x447ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm0
  6162  	LONG $0x20c78348                     // add    rdi, 32
  6163  	LONG $0x02c08348                     // add    rax, 2
  6164  	JNE  LBB0_768
  6165  	JMP  LBB0_1250
  6166  
  6167  LBB0_769:
  6168  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6169  	WORD $0xe683; BYTE $0x80 // and    esi, -128
  6170  	LONG $0x80468d48         // lea    rax, [rsi - 128]
  6171  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6172  	LONG $0x07e8c149         // shr    r8, 7
  6173  	LONG $0x01c08349         // add    r8, 1
  6174  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6175  	JE   LBB0_1257
  6176  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6177  	LONG $0xfee08348         // and    rax, -2
  6178  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6179  	WORD $0xff31             // xor    edi, edi
  6180  
  6181  LBB0_771:
  6182  	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
  6183  	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
  6184  	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
  6185  	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
  6186  	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
  6187  	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
  6188  	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
  6189  	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
  6190  	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
  6191  	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
  6192  	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
  6193  	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
  6194  	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
  6195  	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
  6196  	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
  6197  	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
  6198  	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
  6199  	LONG $0x02c08348                           // add    rax, 2
  6200  	JNE  LBB0_771
  6201  	JMP  LBB0_1258
  6202  
  6203  LBB0_772:
  6204  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  6205  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
  6206  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
  6207  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  6208  	LONG $0x04e8c149             // shr    r8, 4
  6209  	LONG $0x01c08349             // add    r8, 1
  6210  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  6211  	JE   LBB0_1265
  6212  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  6213  	LONG $0xfee08348             // and    rax, -2
  6214  	WORD $0xf748; BYTE $0xd8     // neg    rax
  6215  	WORD $0xff31                 // xor    edi, edi
  6216  	LONG $0x456ff9c5; BYTE $0x40 // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
  6217  
  6218  LBB0_774:
  6219  	LONG $0x0c6ffac5; BYTE $0xfa         // vmovdqu    xmm1, oword [rdx + 8*rdi]
  6220  	LONG $0x546ffac5; WORD $0x10fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
  6221  	LONG $0x5c6ffac5; WORD $0x20fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  6222  	LONG $0x646ffac5; WORD $0x30fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
  6223  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6224  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6225  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  6226  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6227  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6228  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6229  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6230  	LONG $0x546ffac5; WORD $0x50fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
  6231  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6232  	LONG $0x5c6ffac5; WORD $0x40fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
  6233  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6234  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6235  	LONG $0x5c6ffac5; WORD $0x70fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
  6236  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6237  	LONG $0x646ffac5; WORD $0x60fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
  6238  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6239  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  6240  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  6241  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  6242  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  6243  	QUAD $0x000080fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 128]
  6244  	QUAD $0x000090fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 144]
  6245  	QUAD $0x0000a0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 160]
  6246  	QUAD $0x0000b0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 176]
  6247  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6248  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6249  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  6250  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6251  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6252  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6253  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6254  	QUAD $0x0000d0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 208]
  6255  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6256  	QUAD $0x0000c0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 192]
  6257  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6258  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6259  	QUAD $0x0000f0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 240]
  6260  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6261  	QUAD $0x0000e0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 224]
  6262  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6263  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  6264  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  6265  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  6266  	LONG $0x4c7ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm1
  6267  	LONG $0x20c78348                     // add    rdi, 32
  6268  	LONG $0x02c08348                     // add    rax, 2
  6269  	JNE  LBB0_774
  6270  	JMP  LBB0_1266
  6271  
  6272  LBB0_775:
  6273  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6274  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6275  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6276  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6277  	LONG $0x06e8c149         // shr    r8, 6
  6278  	LONG $0x01c08349         // add    r8, 1
  6279  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6280  	JE   LBB0_1273
  6281  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6282  	LONG $0xfee08348         // and    rax, -2
  6283  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6284  	WORD $0xff31             // xor    edi, edi
  6285  	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
  6286  
  6287  LBB0_777:
  6288  	LONG $0x0cdbfdc5; BYTE $0x7a         // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
  6289  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  6290  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  6291  	LONG $0x54dbfdc5; WORD $0x207a       // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
  6292  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  6293  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  6294  	LONG $0x5cdbfdc5; WORD $0x407a       // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
  6295  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  6296  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  6297  	LONG $0x64dbfdc5; WORD $0x607a       // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 96]
  6298  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  6299  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  6300  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  6301  	LONG $0x547ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm2
  6302  	LONG $0x5c7ffac5; WORD $0x2039       // vmovdqu    oword [rcx + rdi + 32], xmm3
  6303  	LONG $0x647ffac5; WORD $0x3039       // vmovdqu    oword [rcx + rdi + 48], xmm4
  6304  	QUAD $0x0000807a8cdbfdc5; BYTE $0x00 // vpand    ymm1, ymm0, yword [rdx + 2*rdi + 128]
  6305  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  6306  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  6307  	QUAD $0x0000a07a94dbfdc5; BYTE $0x00 // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 160]
  6308  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  6309  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  6310  	QUAD $0x0000c07a9cdbfdc5; BYTE $0x00 // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 192]
  6311  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  6312  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  6313  	QUAD $0x0000e07aa4dbfdc5; BYTE $0x00 // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 224]
  6314  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  6315  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  6316  	LONG $0x4c7ffac5; WORD $0x4039       // vmovdqu    oword [rcx + rdi + 64], xmm1
  6317  	LONG $0x547ffac5; WORD $0x5039       // vmovdqu    oword [rcx + rdi + 80], xmm2
  6318  	LONG $0x5c7ffac5; WORD $0x6039       // vmovdqu    oword [rcx + rdi + 96], xmm3
  6319  	LONG $0x647ffac5; WORD $0x7039       // vmovdqu    oword [rcx + rdi + 112], xmm4
  6320  	LONG $0x80ef8348                     // sub    rdi, -128
  6321  	LONG $0x02c08348                     // add    rax, 2
  6322  	JNE  LBB0_777
  6323  	JMP  LBB0_1274
  6324  
  6325  LBB0_778:
  6326  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6327  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6328  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6329  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6330  	LONG $0x06e8c149         // shr    r8, 6
  6331  	LONG $0x01c08349         // add    r8, 1
  6332  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6333  	JE   LBB0_1281
  6334  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6335  	LONG $0xfee08348         // and    rax, -2
  6336  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6337  	WORD $0xff31             // xor    edi, edi
  6338  	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
  6339  
  6340  LBB0_780:
  6341  	LONG $0x0cdbfdc5; BYTE $0x7a         // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
  6342  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  6343  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  6344  	LONG $0x54dbfdc5; WORD $0x207a       // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
  6345  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  6346  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  6347  	LONG $0x5cdbfdc5; WORD $0x407a       // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
  6348  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  6349  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  6350  	LONG $0x64dbfdc5; WORD $0x607a       // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 96]
  6351  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  6352  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  6353  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  6354  	LONG $0x547ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm2
  6355  	LONG $0x5c7ffac5; WORD $0x2039       // vmovdqu    oword [rcx + rdi + 32], xmm3
  6356  	LONG $0x647ffac5; WORD $0x3039       // vmovdqu    oword [rcx + rdi + 48], xmm4
  6357  	QUAD $0x0000807a8cdbfdc5; BYTE $0x00 // vpand    ymm1, ymm0, yword [rdx + 2*rdi + 128]
  6358  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  6359  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  6360  	QUAD $0x0000a07a94dbfdc5; BYTE $0x00 // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 160]
  6361  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  6362  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  6363  	QUAD $0x0000c07a9cdbfdc5; BYTE $0x00 // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 192]
  6364  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  6365  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  6366  	QUAD $0x0000e07aa4dbfdc5; BYTE $0x00 // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 224]
  6367  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  6368  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  6369  	LONG $0x4c7ffac5; WORD $0x4039       // vmovdqu    oword [rcx + rdi + 64], xmm1
  6370  	LONG $0x547ffac5; WORD $0x5039       // vmovdqu    oword [rcx + rdi + 80], xmm2
  6371  	LONG $0x5c7ffac5; WORD $0x6039       // vmovdqu    oword [rcx + rdi + 96], xmm3
  6372  	LONG $0x647ffac5; WORD $0x7039       // vmovdqu    oword [rcx + rdi + 112], xmm4
  6373  	LONG $0x80ef8348                     // sub    rdi, -128
  6374  	LONG $0x02c08348                     // add    rax, 2
  6375  	JNE  LBB0_780
  6376  	JMP  LBB0_1282
  6377  
  6378  LBB0_781:
  6379  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  6380  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
  6381  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
  6382  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  6383  	LONG $0x04e8c149             // shr    r8, 4
  6384  	LONG $0x01c08349             // add    r8, 1
  6385  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  6386  	JE   LBB0_1289
  6387  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  6388  	LONG $0xfee08348             // and    rax, -2
  6389  	WORD $0xf748; BYTE $0xd8     // neg    rax
  6390  	WORD $0xff31                 // xor    edi, edi
  6391  	LONG $0x456ff9c5; BYTE $0x40 // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
  6392  
  6393  LBB0_783:
  6394  	LONG $0x0c6ffac5; BYTE $0xfa         // vmovdqu    xmm1, oword [rdx + 8*rdi]
  6395  	LONG $0x546ffac5; WORD $0x10fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
  6396  	LONG $0x5c6ffac5; WORD $0x20fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  6397  	LONG $0x646ffac5; WORD $0x30fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
  6398  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6399  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6400  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  6401  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6402  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6403  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6404  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6405  	LONG $0x546ffac5; WORD $0x50fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
  6406  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6407  	LONG $0x5c6ffac5; WORD $0x40fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
  6408  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6409  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6410  	LONG $0x5c6ffac5; WORD $0x70fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
  6411  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6412  	LONG $0x646ffac5; WORD $0x60fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
  6413  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6414  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  6415  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  6416  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  6417  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  6418  	QUAD $0x000080fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 128]
  6419  	QUAD $0x000090fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 144]
  6420  	QUAD $0x0000a0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 160]
  6421  	QUAD $0x0000b0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 176]
  6422  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6423  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6424  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  6425  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6426  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6427  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6428  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6429  	QUAD $0x0000d0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 208]
  6430  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6431  	QUAD $0x0000c0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 192]
  6432  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6433  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  6434  	QUAD $0x0000f0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 240]
  6435  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6436  	QUAD $0x0000e0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 224]
  6437  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6438  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  6439  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  6440  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  6441  	LONG $0x4c7ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm1
  6442  	LONG $0x20c78348                     // add    rdi, 32
  6443  	LONG $0x02c08348                     // add    rax, 2
  6444  	JNE  LBB0_783
  6445  	JMP  LBB0_1290
  6446  
  6447  LBB0_784:
  6448  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6449  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6450  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6451  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6452  	LONG $0x05e8c149         // shr    r8, 5
  6453  	LONG $0x01c08349         // add    r8, 1
  6454  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6455  	JE   LBB0_1297
  6456  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6457  	LONG $0xfee08348         // and    rax, -2
  6458  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6459  	WORD $0xff31             // xor    edi, edi
  6460  
  6461  LBB0_786:
  6462  	LONG $0x045bfec5; BYTE $0xba         // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  6463  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  6464  	LONG $0x545bfec5; WORD $0x20ba       // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 32]
  6465  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  6466  	LONG $0x397de3c4; WORD $0x01d1       // vextracti128    xmm1, ymm2, 1
  6467  	LONG $0x5c5bfec5; WORD $0x40ba       // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 64]
  6468  	LONG $0xc96be9c5                     // vpackssdw    xmm1, xmm2, xmm1
  6469  	LONG $0x397de3c4; WORD $0x01da       // vextracti128    xmm2, ymm3, 1
  6470  	LONG $0x645bfec5; WORD $0x60ba       // vcvttps2dq    ymm4, yword [rdx + 4*rdi + 96]
  6471  	LONG $0xd26be1c5                     // vpackssdw    xmm2, xmm3, xmm2
  6472  	LONG $0x397de3c4; WORD $0x01e3       // vextracti128    xmm3, ymm4, 1
  6473  	LONG $0xdb6bd9c5                     // vpackssdw    xmm3, xmm4, xmm3
  6474  	LONG $0x386de3c4; WORD $0x01d3       // vinserti128    ymm2, ymm2, xmm3, 1
  6475  	LONG $0xd063edc5                     // vpacksswb    ymm2, ymm2, ymm0
  6476  	LONG $0x387de3c4; WORD $0x01c1       // vinserti128    ymm0, ymm0, xmm1, 1
  6477  	LONG $0xc063fdc5                     // vpacksswb    ymm0, ymm0, ymm0
  6478  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  6479  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  6480  	LONG $0x047ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm0
  6481  	QUAD $0x000080ba845bfec5; BYTE $0x00 // vcvttps2dq    ymm0, yword [rdx + 4*rdi + 128]
  6482  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  6483  	QUAD $0x0000a0ba945bfec5; BYTE $0x00 // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 160]
  6484  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  6485  	LONG $0x397de3c4; WORD $0x01d1       // vextracti128    xmm1, ymm2, 1
  6486  	QUAD $0x0000c0ba9c5bfec5; BYTE $0x00 // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 192]
  6487  	LONG $0xc96be9c5                     // vpackssdw    xmm1, xmm2, xmm1
  6488  	LONG $0x397de3c4; WORD $0x01da       // vextracti128    xmm2, ymm3, 1
  6489  	QUAD $0x0000e0baa45bfec5; BYTE $0x00 // vcvttps2dq    ymm4, yword [rdx + 4*rdi + 224]
  6490  	LONG $0xd26be1c5                     // vpackssdw    xmm2, xmm3, xmm2
  6491  	LONG $0x397de3c4; WORD $0x01e3       // vextracti128    xmm3, ymm4, 1
  6492  	LONG $0xdb6bd9c5                     // vpackssdw    xmm3, xmm4, xmm3
  6493  	LONG $0x386de3c4; WORD $0x01d3       // vinserti128    ymm2, ymm2, xmm3, 1
  6494  	LONG $0xd063edc5                     // vpacksswb    ymm2, ymm2, ymm0
  6495  	LONG $0x387de3c4; WORD $0x01c1       // vinserti128    ymm0, ymm0, xmm1, 1
  6496  	LONG $0xc063fdc5                     // vpacksswb    ymm0, ymm0, ymm0
  6497  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  6498  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  6499  	LONG $0x447ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm0
  6500  	LONG $0x40c78348                     // add    rdi, 64
  6501  	LONG $0x02c08348                     // add    rax, 2
  6502  	JNE  LBB0_786
  6503  	JMP  LBB0_1298
  6504  
  6505  LBB0_787:
  6506  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6507  	WORD $0xe683; BYTE $0x80 // and    esi, -128
  6508  	LONG $0x80468d48         // lea    rax, [rsi - 128]
  6509  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6510  	LONG $0x07e8c149         // shr    r8, 7
  6511  	LONG $0x01c08349         // add    r8, 1
  6512  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6513  	JE   LBB0_1305
  6514  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6515  	LONG $0xfee08348         // and    rax, -2
  6516  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6517  	WORD $0xff31             // xor    edi, edi
  6518  
  6519  LBB0_789:
  6520  	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
  6521  	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
  6522  	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
  6523  	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
  6524  	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
  6525  	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
  6526  	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
  6527  	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
  6528  	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
  6529  	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
  6530  	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
  6531  	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
  6532  	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
  6533  	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
  6534  	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
  6535  	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
  6536  	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
  6537  	LONG $0x02c08348                           // add    rax, 2
  6538  	JNE  LBB0_789
  6539  	JMP  LBB0_1306
  6540  
  6541  LBB0_790:
  6542  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  6543  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
  6544  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
  6545  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  6546  	LONG $0x05e8c149             // shr    r8, 5
  6547  	LONG $0x01c08349             // add    r8, 1
  6548  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  6549  	JE   LBB0_1313
  6550  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  6551  	LONG $0xfee08348             // and    rax, -2
  6552  	WORD $0xf748; BYTE $0xd8     // neg    rax
  6553  	WORD $0xff31                 // xor    edi, edi
  6554  	LONG $0x456ff9c5; BYTE $0x70 // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
  6555  
  6556  LBB0_792:
  6557  	LONG $0x0c6ffac5; BYTE $0xba         // vmovdqu    xmm1, oword [rdx + 4*rdi]
  6558  	LONG $0x546ffac5; WORD $0x10ba       // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
  6559  	LONG $0x5c6ffac5; WORD $0x20ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
  6560  	LONG $0x646ffac5; WORD $0x30ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
  6561  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6562  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6563  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6564  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6565  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6566  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  6567  	LONG $0x5c6ffac5; WORD $0x50ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
  6568  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6569  	LONG $0x646ffac5; WORD $0x40ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
  6570  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6571  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  6572  	LONG $0x646ffac5; WORD $0x70ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
  6573  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6574  	LONG $0x6c6ffac5; WORD $0x60ba       // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
  6575  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  6576  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  6577  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  6578  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  6579  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  6580  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  6581  	LONG $0x0c7ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm1
  6582  	QUAD $0x000080ba8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 4*rdi + 128]
  6583  	QUAD $0x000090ba946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 4*rdi + 144]
  6584  	QUAD $0x0000a0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 160]
  6585  	QUAD $0x0000b0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 176]
  6586  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  6587  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  6588  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  6589  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  6590  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6591  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  6592  	QUAD $0x0000d0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 208]
  6593  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  6594  	QUAD $0x0000c0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 192]
  6595  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6596  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  6597  	QUAD $0x0000f0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 240]
  6598  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  6599  	QUAD $0x0000e0baac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 4*rdi + 224]
  6600  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  6601  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  6602  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  6603  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  6604  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  6605  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  6606  	LONG $0x4c7ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm1
  6607  	LONG $0x40c78348                     // add    rdi, 64
  6608  	LONG $0x02c08348                     // add    rax, 2
  6609  	JNE  LBB0_792
  6610  	JMP  LBB0_1314
  6611  
  6612  LBB0_801:
  6613  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6614  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6615  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6616  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6617  	LONG $0x04e8c149         // shr    r8, 4
  6618  	LONG $0x01c08349         // add    r8, 1
  6619  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6620  	JE   LBB0_1321
  6621  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6622  	LONG $0xfee08348         // and    rax, -2
  6623  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6624  	WORD $0xff31             // xor    edi, edi
  6625  
  6626  LBB0_803:
  6627  	LONG $0x227de2c4; WORD $0x3a04             // vpmovsxbq    ymm0, dword [rdx + rdi]
  6628  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbq    ymm1, dword [rdx + rdi + 4]
  6629  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbq    ymm2, dword [rdx + rdi + 8]
  6630  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbq    ymm3, dword [rdx + rdi + 12]
  6631  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  6632  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  6633  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  6634  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  6635  	LONG $0x227de2c4; WORD $0x3a44; BYTE $0x10 // vpmovsxbq    ymm0, dword [rdx + rdi + 16]
  6636  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x14 // vpmovsxbq    ymm1, dword [rdx + rdi + 20]
  6637  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x18 // vpmovsxbq    ymm2, dword [rdx + rdi + 24]
  6638  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x1c // vpmovsxbq    ymm3, dword [rdx + rdi + 28]
  6639  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  6640  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  6641  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  6642  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  6643  	LONG $0x20c78348                           // add    rdi, 32
  6644  	LONG $0x02c08348                           // add    rax, 2
  6645  	JNE  LBB0_803
  6646  	JMP  LBB0_1322
  6647  
  6648  LBB0_804:
  6649  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6650  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6651  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6652  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6653  	LONG $0x04e8c149         // shr    r8, 4
  6654  	LONG $0x01c08349         // add    r8, 1
  6655  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6656  	JE   LBB0_1329
  6657  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6658  	LONG $0xfee08348         // and    rax, -2
  6659  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6660  	WORD $0xff31             // xor    edi, edi
  6661  
  6662  LBB0_806:
  6663  	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
  6664  	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
  6665  	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
  6666  	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
  6667  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  6668  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  6669  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  6670  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  6671  	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
  6672  	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
  6673  	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
  6674  	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
  6675  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  6676  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  6677  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  6678  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  6679  	LONG $0x20c78348                     // add    rdi, 32
  6680  	LONG $0x02c08348                     // add    rax, 2
  6681  	JNE  LBB0_806
  6682  	JMP  LBB0_1330
  6683  
  6684  LBB0_807:
  6685  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6686  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6687  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6688  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6689  	LONG $0x04e8c149         // shr    r8, 4
  6690  	LONG $0x01c08349         // add    r8, 1
  6691  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6692  	JE   LBB0_1337
  6693  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6694  	LONG $0xfee08348         // and    rax, -2
  6695  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6696  	WORD $0xff31             // xor    edi, edi
  6697  
  6698  LBB0_809:
  6699  	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
  6700  	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
  6701  	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
  6702  	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
  6703  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  6704  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  6705  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  6706  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  6707  	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
  6708  	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
  6709  	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
  6710  	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
  6711  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  6712  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  6713  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  6714  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  6715  	LONG $0x20c78348                     // add    rdi, 32
  6716  	LONG $0x02c08348                     // add    rax, 2
  6717  	JNE  LBB0_809
  6718  	JMP  LBB0_1338
  6719  
  6720  LBB0_818:
  6721  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6722  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6723  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6724  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6725  	LONG $0x04e8c149         // shr    r8, 4
  6726  	LONG $0x01c08349         // add    r8, 1
  6727  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6728  	JE   LBB0_1345
  6729  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6730  	LONG $0xfee08348         // and    rax, -2
  6731  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6732  	WORD $0xff31             // xor    edi, edi
  6733  
  6734  LBB0_820:
  6735  	LONG $0x327de2c4; WORD $0x3a04             // vpmovzxbq    ymm0, dword [rdx + rdi]
  6736  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbq    ymm1, dword [rdx + rdi + 4]
  6737  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbq    ymm2, dword [rdx + rdi + 8]
  6738  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbq    ymm3, dword [rdx + rdi + 12]
  6739  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  6740  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  6741  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  6742  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  6743  	LONG $0x327de2c4; WORD $0x3a44; BYTE $0x10 // vpmovzxbq    ymm0, dword [rdx + rdi + 16]
  6744  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x14 // vpmovzxbq    ymm1, dword [rdx + rdi + 20]
  6745  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x18 // vpmovzxbq    ymm2, dword [rdx + rdi + 24]
  6746  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x1c // vpmovzxbq    ymm3, dword [rdx + rdi + 28]
  6747  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  6748  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  6749  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  6750  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  6751  	LONG $0x20c78348                           // add    rdi, 32
  6752  	LONG $0x02c08348                           // add    rax, 2
  6753  	JNE  LBB0_820
  6754  	JMP  LBB0_1346
  6755  
  6756  LBB0_821:
  6757  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6758  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6759  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6760  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6761  	LONG $0x06e8c149         // shr    r8, 6
  6762  	LONG $0x01c08349         // add    r8, 1
  6763  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6764  	JE   LBB0_1353
  6765  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6766  	LONG $0xfee08348         // and    rax, -2
  6767  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6768  	WORD $0xff31             // xor    edi, edi
  6769  
  6770  LBB0_823:
  6771  	LONG $0x207de2c4; WORD $0x3a04             // vpmovsxbw    ymm0, oword [rdx + rdi]
  6772  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovsxbw    ymm1, oword [rdx + rdi + 16]
  6773  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x20 // vpmovsxbw    ymm2, oword [rdx + rdi + 32]
  6774  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovsxbw    ymm3, oword [rdx + rdi + 48]
  6775  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  6776  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  6777  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
  6778  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
  6779  	LONG $0x207de2c4; WORD $0x3a44; BYTE $0x40 // vpmovsxbw    ymm0, oword [rdx + rdi + 64]
  6780  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x50 // vpmovsxbw    ymm1, oword [rdx + rdi + 80]
  6781  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x60 // vpmovsxbw    ymm2, oword [rdx + rdi + 96]
  6782  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x70 // vpmovsxbw    ymm3, oword [rdx + rdi + 112]
  6783  	QUAD $0x00008079847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 128], ymm0
  6784  	QUAD $0x0000a0798c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 160], ymm1
  6785  	QUAD $0x0000c079947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 192], ymm2
  6786  	QUAD $0x0000e0799c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 224], ymm3
  6787  	LONG $0x80ef8348                           // sub    rdi, -128
  6788  	LONG $0x02c08348                           // add    rax, 2
  6789  	JNE  LBB0_823
  6790  	JMP  LBB0_1354
  6791  
  6792  LBB0_824:
  6793  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6794  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6795  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6796  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6797  	LONG $0x06e8c149         // shr    r8, 6
  6798  	LONG $0x01c08349         // add    r8, 1
  6799  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6800  	JE   LBB0_1361
  6801  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6802  	LONG $0xfee08348         // and    rax, -2
  6803  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6804  	WORD $0xff31             // xor    edi, edi
  6805  
  6806  LBB0_826:
  6807  	LONG $0x207de2c4; WORD $0x3a04             // vpmovsxbw    ymm0, oword [rdx + rdi]
  6808  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovsxbw    ymm1, oword [rdx + rdi + 16]
  6809  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x20 // vpmovsxbw    ymm2, oword [rdx + rdi + 32]
  6810  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovsxbw    ymm3, oword [rdx + rdi + 48]
  6811  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  6812  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  6813  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
  6814  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
  6815  	LONG $0x207de2c4; WORD $0x3a44; BYTE $0x40 // vpmovsxbw    ymm0, oword [rdx + rdi + 64]
  6816  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x50 // vpmovsxbw    ymm1, oword [rdx + rdi + 80]
  6817  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x60 // vpmovsxbw    ymm2, oword [rdx + rdi + 96]
  6818  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x70 // vpmovsxbw    ymm3, oword [rdx + rdi + 112]
  6819  	QUAD $0x00008079847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 128], ymm0
  6820  	QUAD $0x0000a0798c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 160], ymm1
  6821  	QUAD $0x0000c079947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 192], ymm2
  6822  	QUAD $0x0000e0799c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 224], ymm3
  6823  	LONG $0x80ef8348                           // sub    rdi, -128
  6824  	LONG $0x02c08348                           // add    rax, 2
  6825  	JNE  LBB0_826
  6826  	JMP  LBB0_1362
  6827  
  6828  LBB0_827:
  6829  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6830  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6831  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6832  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6833  	LONG $0x05efc148         // shr    rdi, 5
  6834  	LONG $0x01c78348         // add    rdi, 1
  6835  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6836  	LONG $0x03e08341         // and    r8d, 3
  6837  	LONG $0x60f88348         // cmp    rax, 96
  6838  	JAE  LBB0_1145
  6839  	WORD $0xc031             // xor    eax, eax
  6840  	JMP  LBB0_1147
  6841  
  6842  LBB0_829:
  6843  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6844  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6845  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6846  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6847  	LONG $0x05efc148         // shr    rdi, 5
  6848  	LONG $0x01c78348         // add    rdi, 1
  6849  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6850  	LONG $0x03e08341         // and    r8d, 3
  6851  	LONG $0x60f88348         // cmp    rax, 96
  6852  	JAE  LBB0_1155
  6853  	WORD $0xc031             // xor    eax, eax
  6854  	JMP  LBB0_1157
  6855  
  6856  LBB0_831:
  6857  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6858  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6859  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6860  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6861  	LONG $0x05efc148         // shr    rdi, 5
  6862  	LONG $0x01c78348         // add    rdi, 1
  6863  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6864  	LONG $0x03e08341         // and    r8d, 3
  6865  	LONG $0x60f88348         // cmp    rax, 96
  6866  	JAE  LBB0_1165
  6867  	WORD $0xc031             // xor    eax, eax
  6868  	JMP  LBB0_1167
  6869  
  6870  LBB0_833:
  6871  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6872  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6873  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6874  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6875  	LONG $0x05efc148         // shr    rdi, 5
  6876  	LONG $0x01c78348         // add    rdi, 1
  6877  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6878  	LONG $0x03e08341         // and    r8d, 3
  6879  	LONG $0x60f88348         // cmp    rax, 96
  6880  	JAE  LBB0_1175
  6881  	WORD $0xc031             // xor    eax, eax
  6882  	JMP  LBB0_1177
  6883  
  6884  LBB0_835:
  6885  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6886  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6887  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6888  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6889  	LONG $0x06e8c149         // shr    r8, 6
  6890  	LONG $0x01c08349         // add    r8, 1
  6891  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6892  	JE   LBB0_1369
  6893  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6894  	LONG $0xfee08348         // and    rax, -2
  6895  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6896  	WORD $0xff31             // xor    edi, edi
  6897  
  6898  LBB0_837:
  6899  	LONG $0x307de2c4; WORD $0x3a04             // vpmovzxbw    ymm0, oword [rdx + rdi]
  6900  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovzxbw    ymm1, oword [rdx + rdi + 16]
  6901  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x20 // vpmovzxbw    ymm2, oword [rdx + rdi + 32]
  6902  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovzxbw    ymm3, oword [rdx + rdi + 48]
  6903  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  6904  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  6905  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
  6906  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
  6907  	LONG $0x307de2c4; WORD $0x3a44; BYTE $0x40 // vpmovzxbw    ymm0, oword [rdx + rdi + 64]
  6908  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x50 // vpmovzxbw    ymm1, oword [rdx + rdi + 80]
  6909  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x60 // vpmovzxbw    ymm2, oword [rdx + rdi + 96]
  6910  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x70 // vpmovzxbw    ymm3, oword [rdx + rdi + 112]
  6911  	QUAD $0x00008079847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 128], ymm0
  6912  	QUAD $0x0000a0798c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 160], ymm1
  6913  	QUAD $0x0000c079947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 192], ymm2
  6914  	QUAD $0x0000e0799c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 224], ymm3
  6915  	LONG $0x80ef8348                           // sub    rdi, -128
  6916  	LONG $0x02c08348                           // add    rax, 2
  6917  	JNE  LBB0_837
  6918  	JMP  LBB0_1370
  6919  
  6920  LBB0_838:
  6921  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6922  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  6923  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  6924  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6925  	LONG $0x06e8c149         // shr    r8, 6
  6926  	LONG $0x01c08349         // add    r8, 1
  6927  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6928  	JE   LBB0_1377
  6929  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6930  	LONG $0xfee08348         // and    rax, -2
  6931  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6932  	WORD $0xff31             // xor    edi, edi
  6933  
  6934  LBB0_840:
  6935  	LONG $0x307de2c4; WORD $0x3a04             // vpmovzxbw    ymm0, oword [rdx + rdi]
  6936  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovzxbw    ymm1, oword [rdx + rdi + 16]
  6937  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x20 // vpmovzxbw    ymm2, oword [rdx + rdi + 32]
  6938  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovzxbw    ymm3, oword [rdx + rdi + 48]
  6939  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  6940  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
  6941  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
  6942  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
  6943  	LONG $0x307de2c4; WORD $0x3a44; BYTE $0x40 // vpmovzxbw    ymm0, oword [rdx + rdi + 64]
  6944  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x50 // vpmovzxbw    ymm1, oword [rdx + rdi + 80]
  6945  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x60 // vpmovzxbw    ymm2, oword [rdx + rdi + 96]
  6946  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x70 // vpmovzxbw    ymm3, oword [rdx + rdi + 112]
  6947  	QUAD $0x00008079847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 128], ymm0
  6948  	QUAD $0x0000a0798c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 160], ymm1
  6949  	QUAD $0x0000c079947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 192], ymm2
  6950  	QUAD $0x0000e0799c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 2*rdi + 224], ymm3
  6951  	LONG $0x80ef8348                           // sub    rdi, -128
  6952  	LONG $0x02c08348                           // add    rax, 2
  6953  	JNE  LBB0_840
  6954  	JMP  LBB0_1378
  6955  
  6956  LBB0_841:
  6957  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6958  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  6959  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  6960  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6961  	LONG $0x04e8c149         // shr    r8, 4
  6962  	LONG $0x01c08349         // add    r8, 1
  6963  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6964  	JE   LBB0_1385
  6965  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6966  	LONG $0xfee08348         // and    rax, -2
  6967  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6968  	WORD $0xff31             // xor    edi, edi
  6969  
  6970  LBB0_843:
  6971  	LONG $0x227de2c4; WORD $0x3a04             // vpmovsxbq    ymm0, dword [rdx + rdi]
  6972  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbq    ymm1, dword [rdx + rdi + 4]
  6973  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbq    ymm2, dword [rdx + rdi + 8]
  6974  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbq    ymm3, dword [rdx + rdi + 12]
  6975  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  6976  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  6977  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  6978  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  6979  	LONG $0x227de2c4; WORD $0x3a44; BYTE $0x10 // vpmovsxbq    ymm0, dword [rdx + rdi + 16]
  6980  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x14 // vpmovsxbq    ymm1, dword [rdx + rdi + 20]
  6981  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x18 // vpmovsxbq    ymm2, dword [rdx + rdi + 24]
  6982  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x1c // vpmovsxbq    ymm3, dword [rdx + rdi + 28]
  6983  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  6984  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  6985  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  6986  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  6987  	LONG $0x20c78348                           // add    rdi, 32
  6988  	LONG $0x02c08348                           // add    rax, 2
  6989  	JNE  LBB0_843
  6990  	JMP  LBB0_1386
  6991  
  6992  LBB0_844:
  6993  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6994  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6995  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6996  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6997  	LONG $0x05e8c149         // shr    r8, 5
  6998  	LONG $0x01c08349         // add    r8, 1
  6999  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7000  	JE   LBB0_1393
  7001  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7002  	LONG $0xfee08348         // and    rax, -2
  7003  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7004  	WORD $0xff31             // xor    edi, edi
  7005  
  7006  LBB0_846:
  7007  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
  7008  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
  7009  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
  7010  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
  7011  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  7012  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  7013  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  7014  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  7015  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
  7016  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
  7017  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
  7018  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
  7019  	LONG $0x217de2c4; WORD $0x3a44; BYTE $0x20 // vpmovsxbd    ymm0, qword [rdx + rdi + 32]
  7020  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovsxbd    ymm1, qword [rdx + rdi + 40]
  7021  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x30 // vpmovsxbd    ymm2, qword [rdx + rdi + 48]
  7022  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovsxbd    ymm3, qword [rdx + rdi + 56]
  7023  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  7024  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  7025  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  7026  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  7027  	QUAD $0x000080b98411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 128], ymm0
  7028  	QUAD $0x0000a0b98c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 160], ymm1
  7029  	QUAD $0x0000c0b99411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 192], ymm2
  7030  	QUAD $0x0000e0b99c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 224], ymm3
  7031  	LONG $0x40c78348                           // add    rdi, 64
  7032  	LONG $0x02c08348                           // add    rax, 2
  7033  	JNE  LBB0_846
  7034  	JMP  LBB0_1394
  7035  
  7036  LBB0_847:
  7037  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7038  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  7039  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  7040  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7041  	LONG $0x04e8c149         // shr    r8, 4
  7042  	LONG $0x01c08349         // add    r8, 1
  7043  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7044  	JE   LBB0_1401
  7045  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7046  	LONG $0xfee08348         // and    rax, -2
  7047  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7048  	WORD $0xff31             // xor    edi, edi
  7049  
  7050  LBB0_849:
  7051  	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
  7052  	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
  7053  	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
  7054  	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
  7055  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  7056  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  7057  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  7058  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  7059  	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
  7060  	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
  7061  	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
  7062  	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
  7063  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  7064  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  7065  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  7066  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  7067  	LONG $0x20c78348                     // add    rdi, 32
  7068  	LONG $0x02c08348                     // add    rax, 2
  7069  	JNE  LBB0_849
  7070  	JMP  LBB0_1402
  7071  
  7072  LBB0_860:
  7073  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7074  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  7075  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  7076  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7077  	LONG $0x04e8c149         // shr    r8, 4
  7078  	LONG $0x01c08349         // add    r8, 1
  7079  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7080  	JE   LBB0_1409
  7081  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7082  	LONG $0xfee08348         // and    rax, -2
  7083  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7084  	WORD $0xff31             // xor    edi, edi
  7085  
  7086  LBB0_862:
  7087  	LONG $0x0410fcc5; BYTE $0xfa         // vmovups    ymm0, yword [rdx + 8*rdi]
  7088  	LONG $0x4c10fcc5; WORD $0x20fa       // vmovups    ymm1, yword [rdx + 8*rdi + 32]
  7089  	LONG $0x5410fcc5; WORD $0x40fa       // vmovups    ymm2, yword [rdx + 8*rdi + 64]
  7090  	LONG $0x5c10fcc5; WORD $0x60fa       // vmovups    ymm3, yword [rdx + 8*rdi + 96]
  7091  	LONG $0x0411fcc5; BYTE $0xf9         // vmovups    yword [rcx + 8*rdi], ymm0
  7092  	LONG $0x4c11fcc5; WORD $0x20f9       // vmovups    yword [rcx + 8*rdi + 32], ymm1
  7093  	LONG $0x5411fcc5; WORD $0x40f9       // vmovups    yword [rcx + 8*rdi + 64], ymm2
  7094  	LONG $0x5c11fcc5; WORD $0x60f9       // vmovups    yword [rcx + 8*rdi + 96], ymm3
  7095  	QUAD $0x000080fa8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 8*rdi + 128]
  7096  	QUAD $0x0000a0fa8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 8*rdi + 160]
  7097  	QUAD $0x0000c0fa9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 8*rdi + 192]
  7098  	QUAD $0x0000e0fa9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 8*rdi + 224]
  7099  	QUAD $0x000080f98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 128], ymm0
  7100  	QUAD $0x0000a0f98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 160], ymm1
  7101  	QUAD $0x0000c0f99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 192], ymm2
  7102  	QUAD $0x0000e0f99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 8*rdi + 224], ymm3
  7103  	LONG $0x20c78348                     // add    rdi, 32
  7104  	LONG $0x02c08348                     // add    rax, 2
  7105  	JNE  LBB0_862
  7106  	JMP  LBB0_1410
  7107  
  7108  LBB0_863:
  7109  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7110  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7111  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7112  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7113  	LONG $0x05e8c149         // shr    r8, 5
  7114  	LONG $0x01c08349         // add    r8, 1
  7115  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7116  	JE   LBB0_1417
  7117  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7118  	LONG $0xfee08348         // and    rax, -2
  7119  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7120  	WORD $0xff31             // xor    edi, edi
  7121  
  7122  LBB0_865:
  7123  	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
  7124  	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
  7125  	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
  7126  	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
  7127  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  7128  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  7129  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  7130  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  7131  	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
  7132  	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
  7133  	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
  7134  	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
  7135  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  7136  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  7137  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  7138  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  7139  	LONG $0x40c78348                     // add    rdi, 64
  7140  	LONG $0x02c08348                     // add    rax, 2
  7141  	JNE  LBB0_865
  7142  	JMP  LBB0_1418
  7143  
  7144  LBB0_866:
  7145  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7146  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  7147  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  7148  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7149  	LONG $0x04e8c149         // shr    r8, 4
  7150  	LONG $0x01c08349         // add    r8, 1
  7151  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7152  	JE   LBB0_1425
  7153  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7154  	LONG $0xfee08348         // and    rax, -2
  7155  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7156  	WORD $0xff31             // xor    edi, edi
  7157  
  7158  LBB0_868:
  7159  	LONG $0x327de2c4; WORD $0x3a04             // vpmovzxbq    ymm0, dword [rdx + rdi]
  7160  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbq    ymm1, dword [rdx + rdi + 4]
  7161  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbq    ymm2, dword [rdx + rdi + 8]
  7162  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbq    ymm3, dword [rdx + rdi + 12]
  7163  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  7164  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  7165  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  7166  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  7167  	LONG $0x327de2c4; WORD $0x3a44; BYTE $0x10 // vpmovzxbq    ymm0, dword [rdx + rdi + 16]
  7168  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x14 // vpmovzxbq    ymm1, dword [rdx + rdi + 20]
  7169  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x18 // vpmovzxbq    ymm2, dword [rdx + rdi + 24]
  7170  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x1c // vpmovzxbq    ymm3, dword [rdx + rdi + 28]
  7171  	QUAD $0x000080f9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 128], ymm0
  7172  	QUAD $0x0000a0f98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 160], ymm1
  7173  	QUAD $0x0000c0f9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 192], ymm2
  7174  	QUAD $0x0000e0f99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 8*rdi + 224], ymm3
  7175  	LONG $0x20c78348                           // add    rdi, 32
  7176  	LONG $0x02c08348                           // add    rax, 2
  7177  	JNE  LBB0_868
  7178  	JMP  LBB0_1426
  7179  
  7180  LBB0_869:
  7181  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7182  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7183  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7184  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7185  	LONG $0x05e8c149         // shr    r8, 5
  7186  	LONG $0x01c08349         // add    r8, 1
  7187  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7188  	JE   LBB0_1433
  7189  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7190  	LONG $0xfee08348         // and    rax, -2
  7191  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7192  	WORD $0xff31             // xor    edi, edi
  7193  
  7194  LBB0_871:
  7195  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
  7196  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
  7197  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
  7198  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
  7199  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  7200  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  7201  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  7202  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  7203  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
  7204  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
  7205  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
  7206  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
  7207  	LONG $0x317de2c4; WORD $0x3a44; BYTE $0x20 // vpmovzxbd    ymm0, qword [rdx + rdi + 32]
  7208  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovzxbd    ymm1, qword [rdx + rdi + 40]
  7209  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x30 // vpmovzxbd    ymm2, qword [rdx + rdi + 48]
  7210  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovzxbd    ymm3, qword [rdx + rdi + 56]
  7211  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  7212  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  7213  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  7214  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  7215  	QUAD $0x000080b98411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 128], ymm0
  7216  	QUAD $0x0000a0b98c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 160], ymm1
  7217  	QUAD $0x0000c0b99411fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 192], ymm2
  7218  	QUAD $0x0000e0b99c11fcc5; BYTE $0x00       // vmovups    yword [rcx + 4*rdi + 224], ymm3
  7219  	LONG $0x40c78348                           // add    rdi, 64
  7220  	LONG $0x02c08348                           // add    rax, 2
  7221  	JNE  LBB0_871
  7222  	JMP  LBB0_1434
  7223  
  7224  LBB0_872:
  7225  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  7226  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
  7227  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
  7228  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  7229  	LONG $0x05e8c149             // shr    r8, 5
  7230  	LONG $0x01c08349             // add    r8, 1
  7231  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  7232  	JE   LBB0_1441
  7233  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  7234  	LONG $0xfee08348             // and    rax, -2
  7235  	WORD $0xf748; BYTE $0xd8     // neg    rax
  7236  	WORD $0xff31                 // xor    edi, edi
  7237  	LONG $0x456ff9c5; BYTE $0x70 // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
  7238  
  7239  LBB0_874:
  7240  	LONG $0x0c6ffac5; BYTE $0xba         // vmovdqu    xmm1, oword [rdx + 4*rdi]
  7241  	LONG $0x546ffac5; WORD $0x10ba       // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
  7242  	LONG $0x5c6ffac5; WORD $0x20ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
  7243  	LONG $0x646ffac5; WORD $0x30ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
  7244  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7245  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7246  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7247  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7248  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7249  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  7250  	LONG $0x5c6ffac5; WORD $0x50ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
  7251  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7252  	LONG $0x646ffac5; WORD $0x40ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
  7253  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7254  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  7255  	LONG $0x646ffac5; WORD $0x70ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
  7256  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7257  	LONG $0x6c6ffac5; WORD $0x60ba       // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
  7258  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  7259  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  7260  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  7261  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  7262  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  7263  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  7264  	LONG $0x0c7ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm1
  7265  	QUAD $0x000080ba8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 4*rdi + 128]
  7266  	QUAD $0x000090ba946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 4*rdi + 144]
  7267  	QUAD $0x0000a0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 160]
  7268  	QUAD $0x0000b0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 176]
  7269  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7270  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7271  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7272  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7273  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7274  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  7275  	QUAD $0x0000d0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 208]
  7276  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7277  	QUAD $0x0000c0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 192]
  7278  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7279  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  7280  	QUAD $0x0000f0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 240]
  7281  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7282  	QUAD $0x0000e0baac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 4*rdi + 224]
  7283  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  7284  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  7285  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  7286  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  7287  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  7288  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  7289  	LONG $0x4c7ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm1
  7290  	LONG $0x40c78348                     // add    rdi, 64
  7291  	LONG $0x02c08348                     // add    rax, 2
  7292  	JNE  LBB0_874
  7293  	JMP  LBB0_1442
  7294  
  7295  LBB0_875:
  7296  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7297  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  7298  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  7299  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7300  	LONG $0x04e8c149         // shr    r8, 4
  7301  	LONG $0x01c08349         // add    r8, 1
  7302  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7303  	JE   LBB0_1449
  7304  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7305  	LONG $0xfee08348         // and    rax, -2
  7306  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7307  	WORD $0xff31             // xor    edi, edi
  7308  
  7309  LBB0_877:
  7310  	LONG $0x04e6fdc5; BYTE $0xfa         // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  7311  	LONG $0x2b79e2c4; BYTE $0xc0         // vpackusdw    xmm0, xmm0, xmm0
  7312  	LONG $0x4ce6fdc5; WORD $0x20fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  7313  	LONG $0xc067f9c5                     // vpackuswb    xmm0, xmm0, xmm0
  7314  	LONG $0x2b71e2c4; BYTE $0xc9         // vpackusdw    xmm1, xmm1, xmm1
  7315  	LONG $0xc967f1c5                     // vpackuswb    xmm1, xmm1, xmm1
  7316  	LONG $0xc162f9c5                     // vpunpckldq    xmm0, xmm0, xmm1
  7317  	LONG $0x4ce6fdc5; WORD $0x40fa       // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 64]
  7318  	LONG $0x2b71e2c4; BYTE $0xc9         // vpackusdw    xmm1, xmm1, xmm1
  7319  	LONG $0xc967f1c5                     // vpackuswb    xmm1, xmm1, xmm1
  7320  	LONG $0x54e6fdc5; WORD $0x60fa       // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 96]
  7321  	LONG $0x2b69e2c4; BYTE $0xd2         // vpackusdw    xmm2, xmm2, xmm2
  7322  	LONG $0xd267e9c5                     // vpackuswb    xmm2, xmm2, xmm2
  7323  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7324  	LONG $0xc16cf9c5                     // vpunpcklqdq    xmm0, xmm0, xmm1
  7325  	LONG $0x047ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm0
  7326  	QUAD $0x000080fa84e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm0, yword [rdx + 8*rdi + 128]
  7327  	QUAD $0x0000a0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 160]
  7328  	LONG $0x2b79e2c4; BYTE $0xc0         // vpackusdw    xmm0, xmm0, xmm0
  7329  	LONG $0xc067f9c5                     // vpackuswb    xmm0, xmm0, xmm0
  7330  	LONG $0x2b71e2c4; BYTE $0xc9         // vpackusdw    xmm1, xmm1, xmm1
  7331  	LONG $0xc967f1c5                     // vpackuswb    xmm1, xmm1, xmm1
  7332  	LONG $0xc162f9c5                     // vpunpckldq    xmm0, xmm0, xmm1
  7333  	QUAD $0x0000c0fa8ce6fdc5; BYTE $0x00 // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 192]
  7334  	LONG $0x2b71e2c4; BYTE $0xc9         // vpackusdw    xmm1, xmm1, xmm1
  7335  	QUAD $0x0000e0fa94e6fdc5; BYTE $0x00 // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 224]
  7336  	LONG $0xc967f1c5                     // vpackuswb    xmm1, xmm1, xmm1
  7337  	LONG $0x2b69e2c4; BYTE $0xd2         // vpackusdw    xmm2, xmm2, xmm2
  7338  	LONG $0xd267e9c5                     // vpackuswb    xmm2, xmm2, xmm2
  7339  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7340  	LONG $0xc16cf9c5                     // vpunpcklqdq    xmm0, xmm0, xmm1
  7341  	LONG $0x447ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm0
  7342  	LONG $0x20c78348                     // add    rdi, 32
  7343  	LONG $0x02c08348                     // add    rax, 2
  7344  	JNE  LBB0_877
  7345  	JMP  LBB0_1450
  7346  
  7347  LBB0_878:
  7348  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7349  	WORD $0xe683; BYTE $0x80 // and    esi, -128
  7350  	LONG $0x80468d48         // lea    rax, [rsi - 128]
  7351  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7352  	LONG $0x07e8c149         // shr    r8, 7
  7353  	LONG $0x01c08349         // add    r8, 1
  7354  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7355  	JE   LBB0_1457
  7356  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7357  	LONG $0xfee08348         // and    rax, -2
  7358  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7359  	WORD $0xff31             // xor    edi, edi
  7360  
  7361  LBB0_880:
  7362  	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
  7363  	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
  7364  	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
  7365  	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
  7366  	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
  7367  	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
  7368  	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
  7369  	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
  7370  	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
  7371  	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
  7372  	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
  7373  	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
  7374  	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
  7375  	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
  7376  	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
  7377  	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
  7378  	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
  7379  	LONG $0x02c08348                           // add    rax, 2
  7380  	JNE  LBB0_880
  7381  	JMP  LBB0_1458
  7382  
  7383  LBB0_881:
  7384  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  7385  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
  7386  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
  7387  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  7388  	LONG $0x04e8c149             // shr    r8, 4
  7389  	LONG $0x01c08349             // add    r8, 1
  7390  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  7391  	JE   LBB0_1465
  7392  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  7393  	LONG $0xfee08348             // and    rax, -2
  7394  	WORD $0xf748; BYTE $0xd8     // neg    rax
  7395  	WORD $0xff31                 // xor    edi, edi
  7396  	LONG $0x456ff9c5; BYTE $0x40 // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
  7397  
  7398  LBB0_883:
  7399  	LONG $0x0c6ffac5; BYTE $0xfa         // vmovdqu    xmm1, oword [rdx + 8*rdi]
  7400  	LONG $0x546ffac5; WORD $0x10fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
  7401  	LONG $0x5c6ffac5; WORD $0x20fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  7402  	LONG $0x646ffac5; WORD $0x30fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
  7403  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7404  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7405  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  7406  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7407  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7408  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7409  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7410  	LONG $0x546ffac5; WORD $0x50fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
  7411  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7412  	LONG $0x5c6ffac5; WORD $0x40fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
  7413  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7414  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7415  	LONG $0x5c6ffac5; WORD $0x70fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
  7416  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7417  	LONG $0x646ffac5; WORD $0x60fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
  7418  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7419  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  7420  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  7421  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  7422  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  7423  	QUAD $0x000080fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 128]
  7424  	QUAD $0x000090fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 144]
  7425  	QUAD $0x0000a0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 160]
  7426  	QUAD $0x0000b0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 176]
  7427  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7428  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7429  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  7430  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7431  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7432  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7433  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7434  	QUAD $0x0000d0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 208]
  7435  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7436  	QUAD $0x0000c0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 192]
  7437  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7438  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7439  	QUAD $0x0000f0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 240]
  7440  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7441  	QUAD $0x0000e0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 224]
  7442  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7443  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  7444  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  7445  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  7446  	LONG $0x4c7ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm1
  7447  	LONG $0x20c78348                     // add    rdi, 32
  7448  	LONG $0x02c08348                     // add    rax, 2
  7449  	JNE  LBB0_883
  7450  	JMP  LBB0_1466
  7451  
  7452  LBB0_884:
  7453  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7454  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  7455  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  7456  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7457  	LONG $0x06e8c149         // shr    r8, 6
  7458  	LONG $0x01c08349         // add    r8, 1
  7459  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7460  	JE   LBB0_1473
  7461  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7462  	LONG $0xfee08348         // and    rax, -2
  7463  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7464  	WORD $0xff31             // xor    edi, edi
  7465  	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
  7466  
  7467  LBB0_886:
  7468  	LONG $0x0cdbfdc5; BYTE $0x7a         // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
  7469  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  7470  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  7471  	LONG $0x54dbfdc5; WORD $0x207a       // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
  7472  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  7473  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  7474  	LONG $0x5cdbfdc5; WORD $0x407a       // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
  7475  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  7476  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  7477  	LONG $0x64dbfdc5; WORD $0x607a       // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 96]
  7478  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  7479  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  7480  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  7481  	LONG $0x547ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm2
  7482  	LONG $0x5c7ffac5; WORD $0x2039       // vmovdqu    oword [rcx + rdi + 32], xmm3
  7483  	LONG $0x647ffac5; WORD $0x3039       // vmovdqu    oword [rcx + rdi + 48], xmm4
  7484  	QUAD $0x0000807a8cdbfdc5; BYTE $0x00 // vpand    ymm1, ymm0, yword [rdx + 2*rdi + 128]
  7485  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  7486  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  7487  	QUAD $0x0000a07a94dbfdc5; BYTE $0x00 // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 160]
  7488  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  7489  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  7490  	QUAD $0x0000c07a9cdbfdc5; BYTE $0x00 // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 192]
  7491  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  7492  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  7493  	QUAD $0x0000e07aa4dbfdc5; BYTE $0x00 // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 224]
  7494  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  7495  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  7496  	LONG $0x4c7ffac5; WORD $0x4039       // vmovdqu    oword [rcx + rdi + 64], xmm1
  7497  	LONG $0x547ffac5; WORD $0x5039       // vmovdqu    oword [rcx + rdi + 80], xmm2
  7498  	LONG $0x5c7ffac5; WORD $0x6039       // vmovdqu    oword [rcx + rdi + 96], xmm3
  7499  	LONG $0x647ffac5; WORD $0x7039       // vmovdqu    oword [rcx + rdi + 112], xmm4
  7500  	LONG $0x80ef8348                     // sub    rdi, -128
  7501  	LONG $0x02c08348                     // add    rax, 2
  7502  	JNE  LBB0_886
  7503  	JMP  LBB0_1474
  7504  
  7505  LBB0_887:
  7506  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7507  	WORD $0xe683; BYTE $0xc0 // and    esi, -64
  7508  	LONG $0xc0468d48         // lea    rax, [rsi - 64]
  7509  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7510  	LONG $0x06e8c149         // shr    r8, 6
  7511  	LONG $0x01c08349         // add    r8, 1
  7512  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7513  	JE   LBB0_1481
  7514  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7515  	LONG $0xfee08348         // and    rax, -2
  7516  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7517  	WORD $0xff31             // xor    edi, edi
  7518  	QUAD $0x000000a0856ffdc5 // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
  7519  
  7520  LBB0_889:
  7521  	LONG $0x0cdbfdc5; BYTE $0x7a         // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
  7522  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  7523  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  7524  	LONG $0x54dbfdc5; WORD $0x207a       // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
  7525  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  7526  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  7527  	LONG $0x5cdbfdc5; WORD $0x407a       // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
  7528  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  7529  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  7530  	LONG $0x64dbfdc5; WORD $0x607a       // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 96]
  7531  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  7532  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  7533  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  7534  	LONG $0x547ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm2
  7535  	LONG $0x5c7ffac5; WORD $0x2039       // vmovdqu    oword [rcx + rdi + 32], xmm3
  7536  	LONG $0x647ffac5; WORD $0x3039       // vmovdqu    oword [rcx + rdi + 48], xmm4
  7537  	QUAD $0x0000807a8cdbfdc5; BYTE $0x00 // vpand    ymm1, ymm0, yword [rdx + 2*rdi + 128]
  7538  	LONG $0x397de3c4; WORD $0x01ca       // vextracti128    xmm2, ymm1, 1
  7539  	LONG $0xca67f1c5                     // vpackuswb    xmm1, xmm1, xmm2
  7540  	QUAD $0x0000a07a94dbfdc5; BYTE $0x00 // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 160]
  7541  	LONG $0x397de3c4; WORD $0x01d3       // vextracti128    xmm3, ymm2, 1
  7542  	LONG $0xd367e9c5                     // vpackuswb    xmm2, xmm2, xmm3
  7543  	QUAD $0x0000c07a9cdbfdc5; BYTE $0x00 // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 192]
  7544  	LONG $0x397de3c4; WORD $0x01dc       // vextracti128    xmm4, ymm3, 1
  7545  	LONG $0xdc67e1c5                     // vpackuswb    xmm3, xmm3, xmm4
  7546  	QUAD $0x0000e07aa4dbfdc5; BYTE $0x00 // vpand    ymm4, ymm0, yword [rdx + 2*rdi + 224]
  7547  	LONG $0x397de3c4; WORD $0x01e5       // vextracti128    xmm5, ymm4, 1
  7548  	LONG $0xe567d9c5                     // vpackuswb    xmm4, xmm4, xmm5
  7549  	LONG $0x4c7ffac5; WORD $0x4039       // vmovdqu    oword [rcx + rdi + 64], xmm1
  7550  	LONG $0x547ffac5; WORD $0x5039       // vmovdqu    oword [rcx + rdi + 80], xmm2
  7551  	LONG $0x5c7ffac5; WORD $0x6039       // vmovdqu    oword [rcx + rdi + 96], xmm3
  7552  	LONG $0x647ffac5; WORD $0x7039       // vmovdqu    oword [rcx + rdi + 112], xmm4
  7553  	LONG $0x80ef8348                     // sub    rdi, -128
  7554  	LONG $0x02c08348                     // add    rax, 2
  7555  	JNE  LBB0_889
  7556  	JMP  LBB0_1482
  7557  
  7558  LBB0_890:
  7559  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  7560  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
  7561  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
  7562  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  7563  	LONG $0x04e8c149             // shr    r8, 4
  7564  	LONG $0x01c08349             // add    r8, 1
  7565  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  7566  	JE   LBB0_1489
  7567  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  7568  	LONG $0xfee08348             // and    rax, -2
  7569  	WORD $0xf748; BYTE $0xd8     // neg    rax
  7570  	WORD $0xff31                 // xor    edi, edi
  7571  	LONG $0x456ff9c5; BYTE $0x40 // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
  7572  
  7573  LBB0_892:
  7574  	LONG $0x0c6ffac5; BYTE $0xfa         // vmovdqu    xmm1, oword [rdx + 8*rdi]
  7575  	LONG $0x546ffac5; WORD $0x10fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
  7576  	LONG $0x5c6ffac5; WORD $0x20fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  7577  	LONG $0x646ffac5; WORD $0x30fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
  7578  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7579  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7580  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  7581  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7582  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7583  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7584  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7585  	LONG $0x546ffac5; WORD $0x50fa       // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
  7586  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7587  	LONG $0x5c6ffac5; WORD $0x40fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
  7588  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7589  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7590  	LONG $0x5c6ffac5; WORD $0x70fa       // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
  7591  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7592  	LONG $0x646ffac5; WORD $0x60fa       // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
  7593  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7594  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  7595  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  7596  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  7597  	LONG $0x0c7ffac5; BYTE $0x39         // vmovdqu    oword [rcx + rdi], xmm1
  7598  	QUAD $0x000080fa8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 8*rdi + 128]
  7599  	QUAD $0x000090fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 144]
  7600  	QUAD $0x0000a0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 160]
  7601  	QUAD $0x0000b0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 176]
  7602  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7603  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7604  	LONG $0xca61f1c5                     // vpunpcklwd    xmm1, xmm1, xmm2
  7605  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7606  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7607  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7608  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7609  	QUAD $0x0000d0fa946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 8*rdi + 208]
  7610  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7611  	QUAD $0x0000c0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 192]
  7612  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7613  	LONG $0xd261e1c5                     // vpunpcklwd    xmm2, xmm3, xmm2
  7614  	QUAD $0x0000f0fa9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 8*rdi + 240]
  7615  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7616  	QUAD $0x0000e0faa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 8*rdi + 224]
  7617  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7618  	LONG $0xdb61d9c5                     // vpunpcklwd    xmm3, xmm4, xmm3
  7619  	LONG $0xd362e9c5                     // vpunpckldq    xmm2, xmm2, xmm3
  7620  	LONG $0xca6cf1c5                     // vpunpcklqdq    xmm1, xmm1, xmm2
  7621  	LONG $0x4c7ffac5; WORD $0x1039       // vmovdqu    oword [rcx + rdi + 16], xmm1
  7622  	LONG $0x20c78348                     // add    rdi, 32
  7623  	LONG $0x02c08348                     // add    rax, 2
  7624  	JNE  LBB0_892
  7625  	JMP  LBB0_1490
  7626  
  7627  LBB0_893:
  7628  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7629  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7630  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7631  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7632  	LONG $0x05e8c149         // shr    r8, 5
  7633  	LONG $0x01c08349         // add    r8, 1
  7634  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7635  	JE   LBB0_1497
  7636  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7637  	LONG $0xfee08348         // and    rax, -2
  7638  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7639  	WORD $0xff31             // xor    edi, edi
  7640  
  7641  LBB0_895:
  7642  	LONG $0x045bfec5; BYTE $0xba         // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  7643  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  7644  	LONG $0x545bfec5; WORD $0x20ba       // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 32]
  7645  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  7646  	LONG $0x397de3c4; WORD $0x01d1       // vextracti128    xmm1, ymm2, 1
  7647  	LONG $0x5c5bfec5; WORD $0x40ba       // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 64]
  7648  	LONG $0xc96be9c5                     // vpackssdw    xmm1, xmm2, xmm1
  7649  	LONG $0x397de3c4; WORD $0x01da       // vextracti128    xmm2, ymm3, 1
  7650  	LONG $0x645bfec5; WORD $0x60ba       // vcvttps2dq    ymm4, yword [rdx + 4*rdi + 96]
  7651  	LONG $0xd26be1c5                     // vpackssdw    xmm2, xmm3, xmm2
  7652  	LONG $0x397de3c4; WORD $0x01e3       // vextracti128    xmm3, ymm4, 1
  7653  	LONG $0xdb6bd9c5                     // vpackssdw    xmm3, xmm4, xmm3
  7654  	LONG $0x386de3c4; WORD $0x01d3       // vinserti128    ymm2, ymm2, xmm3, 1
  7655  	LONG $0xd067edc5                     // vpackuswb    ymm2, ymm2, ymm0
  7656  	LONG $0x387de3c4; WORD $0x01c1       // vinserti128    ymm0, ymm0, xmm1, 1
  7657  	LONG $0xc067fdc5                     // vpackuswb    ymm0, ymm0, ymm0
  7658  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  7659  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  7660  	LONG $0x047ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm0
  7661  	QUAD $0x000080ba845bfec5; BYTE $0x00 // vcvttps2dq    ymm0, yword [rdx + 4*rdi + 128]
  7662  	LONG $0x397de3c4; WORD $0x01c1       // vextracti128    xmm1, ymm0, 1
  7663  	QUAD $0x0000a0ba945bfec5; BYTE $0x00 // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 160]
  7664  	LONG $0xc16bf9c5                     // vpackssdw    xmm0, xmm0, xmm1
  7665  	LONG $0x397de3c4; WORD $0x01d1       // vextracti128    xmm1, ymm2, 1
  7666  	QUAD $0x0000c0ba9c5bfec5; BYTE $0x00 // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 192]
  7667  	LONG $0xc96be9c5                     // vpackssdw    xmm1, xmm2, xmm1
  7668  	LONG $0x397de3c4; WORD $0x01da       // vextracti128    xmm2, ymm3, 1
  7669  	QUAD $0x0000e0baa45bfec5; BYTE $0x00 // vcvttps2dq    ymm4, yword [rdx + 4*rdi + 224]
  7670  	LONG $0xd26be1c5                     // vpackssdw    xmm2, xmm3, xmm2
  7671  	LONG $0x397de3c4; WORD $0x01e3       // vextracti128    xmm3, ymm4, 1
  7672  	LONG $0xdb6bd9c5                     // vpackssdw    xmm3, xmm4, xmm3
  7673  	LONG $0x386de3c4; WORD $0x01d3       // vinserti128    ymm2, ymm2, xmm3, 1
  7674  	LONG $0xd067edc5                     // vpackuswb    ymm2, ymm2, ymm0
  7675  	LONG $0x387de3c4; WORD $0x01c1       // vinserti128    ymm0, ymm0, xmm1, 1
  7676  	LONG $0xc067fdc5                     // vpackuswb    ymm0, ymm0, ymm0
  7677  	LONG $0xc26cfdc5                     // vpunpcklqdq    ymm0, ymm0, ymm2
  7678  	LONG $0x00fde3c4; WORD $0xd8c0       // vpermq    ymm0, ymm0, 216
  7679  	LONG $0x447ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm0
  7680  	LONG $0x40c78348                     // add    rdi, 64
  7681  	LONG $0x02c08348                     // add    rax, 2
  7682  	JNE  LBB0_895
  7683  	JMP  LBB0_1498
  7684  
  7685  LBB0_896:
  7686  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7687  	WORD $0xe683; BYTE $0x80 // and    esi, -128
  7688  	LONG $0x80468d48         // lea    rax, [rsi - 128]
  7689  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7690  	LONG $0x07e8c149         // shr    r8, 7
  7691  	LONG $0x01c08349         // add    r8, 1
  7692  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7693  	JE   LBB0_1505
  7694  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7695  	LONG $0xfee08348         // and    rax, -2
  7696  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7697  	WORD $0xff31             // xor    edi, edi
  7698  
  7699  LBB0_898:
  7700  	LONG $0x0410fcc5; BYTE $0x3a               // vmovups    ymm0, yword [rdx + rdi]
  7701  	LONG $0x4c10fcc5; WORD $0x203a             // vmovups    ymm1, yword [rdx + rdi + 32]
  7702  	LONG $0x5410fcc5; WORD $0x403a             // vmovups    ymm2, yword [rdx + rdi + 64]
  7703  	LONG $0x5c10fcc5; WORD $0x603a             // vmovups    ymm3, yword [rdx + rdi + 96]
  7704  	LONG $0x0411fcc5; BYTE $0x39               // vmovups    yword [rcx + rdi], ymm0
  7705  	LONG $0x4c11fcc5; WORD $0x2039             // vmovups    yword [rcx + rdi + 32], ymm1
  7706  	LONG $0x5411fcc5; WORD $0x4039             // vmovups    yword [rcx + rdi + 64], ymm2
  7707  	LONG $0x5c11fcc5; WORD $0x6039             // vmovups    yword [rcx + rdi + 96], ymm3
  7708  	QUAD $0x0000803a8410fdc5; BYTE $0x00       // vmovupd    ymm0, yword [rdx + rdi + 128]
  7709  	QUAD $0x0000a03a8c10fdc5; BYTE $0x00       // vmovupd    ymm1, yword [rdx + rdi + 160]
  7710  	QUAD $0x0000c03a9410fdc5; BYTE $0x00       // vmovupd    ymm2, yword [rdx + rdi + 192]
  7711  	QUAD $0x0000e03a9c10fdc5; BYTE $0x00       // vmovupd    ymm3, yword [rdx + rdi + 224]
  7712  	QUAD $0x000080398411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 128], ymm0
  7713  	QUAD $0x0000a0398c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 160], ymm1
  7714  	QUAD $0x0000c0399411fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 192], ymm2
  7715  	QUAD $0x0000e0399c11fdc5; BYTE $0x00       // vmovupd    yword [rcx + rdi + 224], ymm3
  7716  	LONG $0x00c78148; WORD $0x0001; BYTE $0x00 // add    rdi, 256
  7717  	LONG $0x02c08348                           // add    rax, 2
  7718  	JNE  LBB0_898
  7719  	JMP  LBB0_1506
  7720  
  7721  LBB0_899:
  7722  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  7723  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
  7724  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
  7725  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  7726  	LONG $0x05e8c149             // shr    r8, 5
  7727  	LONG $0x01c08349             // add    r8, 1
  7728  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  7729  	JE   LBB0_1513
  7730  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  7731  	LONG $0xfee08348             // and    rax, -2
  7732  	WORD $0xf748; BYTE $0xd8     // neg    rax
  7733  	WORD $0xff31                 // xor    edi, edi
  7734  	LONG $0x456ff9c5; BYTE $0x70 // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
  7735  
  7736  LBB0_901:
  7737  	LONG $0x0c6ffac5; BYTE $0xba         // vmovdqu    xmm1, oword [rdx + 4*rdi]
  7738  	LONG $0x546ffac5; WORD $0x10ba       // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
  7739  	LONG $0x5c6ffac5; WORD $0x20ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
  7740  	LONG $0x646ffac5; WORD $0x30ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
  7741  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7742  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7743  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7744  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7745  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7746  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  7747  	LONG $0x5c6ffac5; WORD $0x50ba       // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
  7748  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7749  	LONG $0x646ffac5; WORD $0x40ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
  7750  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7751  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  7752  	LONG $0x646ffac5; WORD $0x70ba       // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
  7753  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7754  	LONG $0x6c6ffac5; WORD $0x60ba       // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
  7755  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  7756  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  7757  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  7758  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  7759  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  7760  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  7761  	LONG $0x0c7ffec5; BYTE $0x39         // vmovdqu    yword [rcx + rdi], ymm1
  7762  	QUAD $0x000080ba8c6ffac5; BYTE $0x00 // vmovdqu    xmm1, oword [rdx + 4*rdi + 128]
  7763  	QUAD $0x000090ba946ffac5; BYTE $0x00 // vmovdqu    xmm2, oword [rdx + 4*rdi + 144]
  7764  	QUAD $0x0000a0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 160]
  7765  	QUAD $0x0000b0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 176]
  7766  	LONG $0x0069e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm2, xmm0
  7767  	LONG $0x0071e2c4; BYTE $0xc8         // vpshufb    xmm1, xmm1, xmm0
  7768  	LONG $0xca62f1c5                     // vpunpckldq    xmm1, xmm1, xmm2
  7769  	LONG $0x0059e2c4; BYTE $0xd0         // vpshufb    xmm2, xmm4, xmm0
  7770  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7771  	LONG $0xd262e1c5                     // vpunpckldq    xmm2, xmm3, xmm2
  7772  	QUAD $0x0000d0ba9c6ffac5; BYTE $0x00 // vmovdqu    xmm3, oword [rdx + 4*rdi + 208]
  7773  	LONG $0x0061e2c4; BYTE $0xd8         // vpshufb    xmm3, xmm3, xmm0
  7774  	QUAD $0x0000c0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 192]
  7775  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7776  	LONG $0xdb62d9c5                     // vpunpckldq    xmm3, xmm4, xmm3
  7777  	QUAD $0x0000f0baa46ffac5; BYTE $0x00 // vmovdqu    xmm4, oword [rdx + 4*rdi + 240]
  7778  	LONG $0x0059e2c4; BYTE $0xe0         // vpshufb    xmm4, xmm4, xmm0
  7779  	QUAD $0x0000e0baac6ffac5; BYTE $0x00 // vmovdqu    xmm5, oword [rdx + 4*rdi + 224]
  7780  	LONG $0x0051e2c4; BYTE $0xe8         // vpshufb    xmm5, xmm5, xmm0
  7781  	LONG $0xe462d1c5                     // vpunpckldq    xmm4, xmm5, xmm4
  7782  	LONG $0x3865e3c4; WORD $0x01dc       // vinserti128    ymm3, ymm3, xmm4, 1
  7783  	LONG $0x3875e3c4; WORD $0x01ca       // vinserti128    ymm1, ymm1, xmm2, 1
  7784  	LONG $0xcb6cf5c5                     // vpunpcklqdq    ymm1, ymm1, ymm3
  7785  	LONG $0x00fde3c4; WORD $0xd8c9       // vpermq    ymm1, ymm1, 216
  7786  	LONG $0x4c7ffec5; WORD $0x2039       // vmovdqu    yword [rcx + rdi + 32], ymm1
  7787  	LONG $0x40c78348                     // add    rdi, 64
  7788  	LONG $0x02c08348                     // add    rax, 2
  7789  	JNE  LBB0_901
  7790  	JMP  LBB0_1514
  7791  
  7792  LBB0_902:
  7793  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7794  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7795  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7796  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7797  	LONG $0x05e8c149         // shr    r8, 5
  7798  	LONG $0x01c08349         // add    r8, 1
  7799  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7800  	JE   LBB0_1521
  7801  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7802  	LONG $0xfee08348         // and    rax, -2
  7803  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7804  	WORD $0xff31             // xor    edi, edi
  7805  
  7806  LBB0_904:
  7807  	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
  7808  	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
  7809  	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
  7810  	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
  7811  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  7812  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  7813  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  7814  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  7815  	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
  7816  	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
  7817  	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
  7818  	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
  7819  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  7820  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  7821  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  7822  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  7823  	LONG $0x40c78348                     // add    rdi, 64
  7824  	LONG $0x02c08348                     // add    rax, 2
  7825  	JNE  LBB0_904
  7826  	JMP  LBB0_1522
  7827  
  7828  LBB0_905:
  7829  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7830  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7831  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7832  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7833  	LONG $0x05e8c149         // shr    r8, 5
  7834  	LONG $0x01c08349         // add    r8, 1
  7835  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7836  	JE   LBB0_1529
  7837  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7838  	LONG $0xfee08348         // and    rax, -2
  7839  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7840  	WORD $0xff31             // xor    edi, edi
  7841  
  7842  LBB0_907:
  7843  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
  7844  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
  7845  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
  7846  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
  7847  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  7848  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  7849  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  7850  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  7851  	LONG $0x217de2c4; WORD $0x3a44; BYTE $0x20 // vpmovsxbd    ymm0, qword [rdx + rdi + 32]
  7852  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovsxbd    ymm1, qword [rdx + rdi + 40]
  7853  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x30 // vpmovsxbd    ymm2, qword [rdx + rdi + 48]
  7854  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovsxbd    ymm3, qword [rdx + rdi + 56]
  7855  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  7856  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  7857  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  7858  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  7859  	LONG $0x40c78348                           // add    rdi, 64
  7860  	LONG $0x02c08348                           // add    rax, 2
  7861  	JNE  LBB0_907
  7862  	JMP  LBB0_1530
  7863  
  7864  LBB0_908:
  7865  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7866  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7867  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7868  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7869  	LONG $0x05e8c149         // shr    r8, 5
  7870  	LONG $0x01c08349         // add    r8, 1
  7871  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7872  	JE   LBB0_1537
  7873  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7874  	LONG $0xfee08348         // and    rax, -2
  7875  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7876  	WORD $0xff31             // xor    edi, edi
  7877  
  7878  LBB0_910:
  7879  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
  7880  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
  7881  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
  7882  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
  7883  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  7884  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  7885  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  7886  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  7887  	LONG $0x317de2c4; WORD $0x3a44; BYTE $0x20 // vpmovzxbd    ymm0, qword [rdx + rdi + 32]
  7888  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x28 // vpmovzxbd    ymm1, qword [rdx + rdi + 40]
  7889  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x30 // vpmovzxbd    ymm2, qword [rdx + rdi + 48]
  7890  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x38 // vpmovzxbd    ymm3, qword [rdx + rdi + 56]
  7891  	QUAD $0x000080b9847ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 128], ymm0
  7892  	QUAD $0x0000a0b98c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 160], ymm1
  7893  	QUAD $0x0000c0b9947ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 192], ymm2
  7894  	QUAD $0x0000e0b99c7ffec5; BYTE $0x00       // vmovdqu    yword [rcx + 4*rdi + 224], ymm3
  7895  	LONG $0x40c78348                           // add    rdi, 64
  7896  	LONG $0x02c08348                           // add    rax, 2
  7897  	JNE  LBB0_910
  7898  	JMP  LBB0_1538
  7899  
  7900  LBB0_911:
  7901  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  7902  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  7903  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  7904  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  7905  	LONG $0x05e8c149         // shr    r8, 5
  7906  	LONG $0x01c08349         // add    r8, 1
  7907  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  7908  	JE   LBB0_1545
  7909  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  7910  	LONG $0xfee08348         // and    rax, -2
  7911  	WORD $0xf748; BYTE $0xd8 // neg    rax
  7912  	WORD $0xff31             // xor    edi, edi
  7913  
  7914  LBB0_913:
  7915  	LONG $0x0410fcc5; BYTE $0xba         // vmovups    ymm0, yword [rdx + 4*rdi]
  7916  	LONG $0x4c10fcc5; WORD $0x20ba       // vmovups    ymm1, yword [rdx + 4*rdi + 32]
  7917  	LONG $0x5410fcc5; WORD $0x40ba       // vmovups    ymm2, yword [rdx + 4*rdi + 64]
  7918  	LONG $0x5c10fcc5; WORD $0x60ba       // vmovups    ymm3, yword [rdx + 4*rdi + 96]
  7919  	LONG $0x0411fcc5; BYTE $0xb9         // vmovups    yword [rcx + 4*rdi], ymm0
  7920  	LONG $0x4c11fcc5; WORD $0x20b9       // vmovups    yword [rcx + 4*rdi + 32], ymm1
  7921  	LONG $0x5411fcc5; WORD $0x40b9       // vmovups    yword [rcx + 4*rdi + 64], ymm2
  7922  	LONG $0x5c11fcc5; WORD $0x60b9       // vmovups    yword [rcx + 4*rdi + 96], ymm3
  7923  	QUAD $0x000080ba8410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 4*rdi + 128]
  7924  	QUAD $0x0000a0ba8c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 4*rdi + 160]
  7925  	QUAD $0x0000c0ba9410fdc5; BYTE $0x00 // vmovupd    ymm2, yword [rdx + 4*rdi + 192]
  7926  	QUAD $0x0000e0ba9c10fdc5; BYTE $0x00 // vmovupd    ymm3, yword [rdx + 4*rdi + 224]
  7927  	QUAD $0x000080b98411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 128], ymm0
  7928  	QUAD $0x0000a0b98c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 160], ymm1
  7929  	QUAD $0x0000c0b99411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 192], ymm2
  7930  	QUAD $0x0000e0b99c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 4*rdi + 224], ymm3
  7931  	LONG $0x40c78348                     // add    rdi, 64
  7932  	LONG $0x02c08348                     // add    rax, 2
  7933  	JNE  LBB0_913
  7934  	JMP  LBB0_1546
  7935  
  7936  LBB0_793:
  7937  	LONG $0xfce28349             // and    r10, -4
  7938  	WORD $0xf749; BYTE $0xda     // neg    r10
  7939  	WORD $0xc031                 // xor    eax, eax
  7940  	LONG $0x4510fbc5; BYTE $0x00 // vmovsd    xmm0, qword 0[rbp] /* [rip + .LCPI0_0] */
  7941  
  7942  LBB0_794:
  7943  	LONG $0x4c10fbc5; WORD $0x08c2 // vmovsd    xmm1, qword [rdx + 8*rax + 8]
  7944  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  7945  	LONG $0x2cfbe1c4; BYTE $0xda   // vcvttsd2si    rbx, xmm2
  7946  	LONG $0x1410fbc5; BYTE $0xc2   // vmovsd    xmm2, qword [rdx + 8*rax]
  7947  	WORD $0x314c; BYTE $0xdb       // xor    rbx, r11
  7948  	LONG $0x2cfbe1c4; BYTE $0xf1   // vcvttsd2si    rsi, xmm1
  7949  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  7950  	LONG $0xf3430f48               // cmovae    rsi, rbx
  7951  	LONG $0xc85cebc5               // vsubsd    xmm1, xmm2, xmm0
  7952  	LONG $0x2cfbe1c4; BYTE $0xd9   // vcvttsd2si    rbx, xmm1
  7953  	WORD $0x314c; BYTE $0xdb       // xor    rbx, r11
  7954  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  7955  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  7956  	LONG $0x6ef9e1c4; BYTE $0xce   // vmovq    xmm1, rsi
  7957  	LONG $0xfb430f48               // cmovae    rdi, rbx
  7958  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  7959  	LONG $0x5c10fbc5; WORD $0x18c2 // vmovsd    xmm3, qword [rdx + 8*rax + 24]
  7960  	LONG $0xe05ce3c5               // vsubsd    xmm4, xmm3, xmm0
  7961  	LONG $0x2cfbe1c4; BYTE $0xf4   // vcvttsd2si    rsi, xmm4
  7962  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  7963  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  7964  	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
  7965  	LONG $0xd82ef9c5               // vucomisd    xmm3, xmm0
  7966  	LONG $0xfe430f48               // cmovae    rdi, rsi
  7967  	LONG $0x5410fbc5; WORD $0x10c2 // vmovsd    xmm2, qword [rdx + 8*rax + 16]
  7968  	LONG $0xd85cebc5               // vsubsd    xmm3, xmm2, xmm0
  7969  	LONG $0x2cfbe1c4; BYTE $0xf3   // vcvttsd2si    rsi, xmm3
  7970  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  7971  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  7972  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  7973  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  7974  	LONG $0xfe430f48               // cmovae    rdi, rsi
  7975  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  7976  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  7977  	LONG $0x547ffac5; WORD $0x10c1 // vmovdqu    oword [rcx + 8*rax + 16], xmm2
  7978  	LONG $0x0c7ffac5; BYTE $0xc1   // vmovdqu    oword [rcx + 8*rax], xmm1
  7979  	LONG $0x4c10fbc5; WORD $0x28c2 // vmovsd    xmm1, qword [rdx + 8*rax + 40]
  7980  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  7981  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  7982  	LONG $0x5410fbc5; WORD $0x20c2 // vmovsd    xmm2, qword [rdx + 8*rax + 32]
  7983  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  7984  	LONG $0x2cfbe1c4; BYTE $0xf9   // vcvttsd2si    rdi, xmm1
  7985  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  7986  	LONG $0xfe430f48               // cmovae    rdi, rsi
  7987  	LONG $0xc85cebc5               // vsubsd    xmm1, xmm2, xmm0
  7988  	LONG $0x2cfbe1c4; BYTE $0xf1   // vcvttsd2si    rsi, xmm1
  7989  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  7990  	LONG $0x2cfbe1c4; BYTE $0xda   // vcvttsd2si    rbx, xmm2
  7991  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  7992  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  7993  	LONG $0xde430f48               // cmovae    rbx, rsi
  7994  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  7995  	LONG $0x5c10fbc5; WORD $0x38c2 // vmovsd    xmm3, qword [rdx + 8*rax + 56]
  7996  	LONG $0xe05ce3c5               // vsubsd    xmm4, xmm3, xmm0
  7997  	LONG $0x2cfbe1c4; BYTE $0xf4   // vcvttsd2si    rsi, xmm4
  7998  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  7999  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8000  	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
  8001  	LONG $0xd82ef9c5               // vucomisd    xmm3, xmm0
  8002  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8003  	LONG $0x5410fbc5; WORD $0x30c2 // vmovsd    xmm2, qword [rdx + 8*rax + 48]
  8004  	LONG $0xd85cebc5               // vsubsd    xmm3, xmm2, xmm0
  8005  	LONG $0x2cfbe1c4; BYTE $0xf3   // vcvttsd2si    rsi, xmm3
  8006  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8007  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8008  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  8009  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  8010  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8011  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8012  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  8013  	LONG $0x547ffac5; WORD $0x30c1 // vmovdqu    oword [rcx + 8*rax + 48], xmm2
  8014  	LONG $0x4c7ffac5; WORD $0x20c1 // vmovdqu    oword [rcx + 8*rax + 32], xmm1
  8015  	LONG $0x4c10fbc5; WORD $0x48c2 // vmovsd    xmm1, qword [rdx + 8*rax + 72]
  8016  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8017  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  8018  	LONG $0x5410fbc5; WORD $0x40c2 // vmovsd    xmm2, qword [rdx + 8*rax + 64]
  8019  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8020  	LONG $0x2cfbe1c4; BYTE $0xf9   // vcvttsd2si    rdi, xmm1
  8021  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  8022  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8023  	LONG $0xc85cebc5               // vsubsd    xmm1, xmm2, xmm0
  8024  	LONG $0x2cfbe1c4; BYTE $0xf1   // vcvttsd2si    rsi, xmm1
  8025  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8026  	LONG $0x2cfbe1c4; BYTE $0xda   // vcvttsd2si    rbx, xmm2
  8027  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  8028  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8029  	LONG $0xde430f48               // cmovae    rbx, rsi
  8030  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8031  	LONG $0x5c10fbc5; WORD $0x58c2 // vmovsd    xmm3, qword [rdx + 8*rax + 88]
  8032  	LONG $0xe05ce3c5               // vsubsd    xmm4, xmm3, xmm0
  8033  	LONG $0x2cfbe1c4; BYTE $0xf4   // vcvttsd2si    rsi, xmm4
  8034  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8035  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8036  	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
  8037  	LONG $0xd82ef9c5               // vucomisd    xmm3, xmm0
  8038  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8039  	LONG $0x5410fbc5; WORD $0x50c2 // vmovsd    xmm2, qword [rdx + 8*rax + 80]
  8040  	LONG $0xd85cebc5               // vsubsd    xmm3, xmm2, xmm0
  8041  	LONG $0x2cfbe1c4; BYTE $0xf3   // vcvttsd2si    rsi, xmm3
  8042  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8043  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8044  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  8045  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  8046  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8047  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8048  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  8049  	LONG $0x547ffac5; WORD $0x50c1 // vmovdqu    oword [rcx + 8*rax + 80], xmm2
  8050  	LONG $0x4c7ffac5; WORD $0x40c1 // vmovdqu    oword [rcx + 8*rax + 64], xmm1
  8051  	LONG $0x4c10fbc5; WORD $0x68c2 // vmovsd    xmm1, qword [rdx + 8*rax + 104]
  8052  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8053  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  8054  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8055  	LONG $0x2cfbe1c4; BYTE $0xf9   // vcvttsd2si    rdi, xmm1
  8056  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  8057  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8058  	LONG $0x4c10fbc5; WORD $0x60c2 // vmovsd    xmm1, qword [rdx + 8*rax + 96]
  8059  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8060  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  8061  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8062  	LONG $0x2cfbe1c4; BYTE $0xd9   // vcvttsd2si    rbx, xmm1
  8063  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  8064  	LONG $0xde430f48               // cmovae    rbx, rsi
  8065  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8066  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8067  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8068  	LONG $0x5410fbc5; WORD $0x78c2 // vmovsd    xmm2, qword [rdx + 8*rax + 120]
  8069  	LONG $0xd85cebc5               // vsubsd    xmm3, xmm2, xmm0
  8070  	LONG $0x2cfbe1c4; BYTE $0xf3   // vcvttsd2si    rsi, xmm3
  8071  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8072  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  8073  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  8074  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8075  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8076  	LONG $0x5c10fbc5; WORD $0x70c2 // vmovsd    xmm3, qword [rdx + 8*rax + 112]
  8077  	LONG $0xe05ce3c5               // vsubsd    xmm4, xmm3, xmm0
  8078  	LONG $0x2cfbe1c4; BYTE $0xf4   // vcvttsd2si    rsi, xmm4
  8079  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8080  	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
  8081  	LONG $0xd82ef9c5               // vucomisd    xmm3, xmm0
  8082  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8083  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8084  	LONG $0xd26ce1c5               // vpunpcklqdq    xmm2, xmm3, xmm2
  8085  	LONG $0x547ffac5; WORD $0x70c1 // vmovdqu    oword [rcx + 8*rax + 112], xmm2
  8086  	LONG $0x4c7ffac5; WORD $0x60c1 // vmovdqu    oword [rcx + 8*rax + 96], xmm1
  8087  	LONG $0x10c08348               // add    rax, 16
  8088  	LONG $0x04c28349               // add    r10, 4
  8089  	JNE  LBB0_794
  8090  
  8091  LBB0_795:
  8092  	WORD $0x854d; BYTE $0xc0     // test    r8, r8
  8093  	JE   LBB0_798
  8094  	LONG $0x03e0c148             // shl    rax, 3
  8095  	WORD $0xf749; BYTE $0xd8     // neg    r8
  8096  	LONG $0x4510fbc5; BYTE $0x00 // vmovsd    xmm0, qword 0[rbp] /* [rip + .LCPI0_0] */
  8097  
  8098  LBB0_797:
  8099  	LONG $0x4c10fbc5; WORD $0x0802 // vmovsd    xmm1, qword [rdx + rax + 8]
  8100  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8101  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  8102  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8103  	LONG $0x2cfbe1c4; BYTE $0xf9   // vcvttsd2si    rdi, xmm1
  8104  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  8105  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8106  	LONG $0x0c10fbc5; BYTE $0x02   // vmovsd    xmm1, qword [rdx + rax]
  8107  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8108  	LONG $0x2cfbe1c4; BYTE $0xf2   // vcvttsd2si    rsi, xmm2
  8109  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8110  	LONG $0x2cfbe1c4; BYTE $0xd9   // vcvttsd2si    rbx, xmm1
  8111  	LONG $0xc82ef9c5               // vucomisd    xmm1, xmm0
  8112  	LONG $0xde430f48               // cmovae    rbx, rsi
  8113  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8114  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8115  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8116  	LONG $0x5410fbc5; WORD $0x1802 // vmovsd    xmm2, qword [rdx + rax + 24]
  8117  	LONG $0xd85cebc5               // vsubsd    xmm3, xmm2, xmm0
  8118  	LONG $0x2cfbe1c4; BYTE $0xf3   // vcvttsd2si    rsi, xmm3
  8119  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8120  	LONG $0x2cfbe1c4; BYTE $0xfa   // vcvttsd2si    rdi, xmm2
  8121  	LONG $0xd02ef9c5               // vucomisd    xmm2, xmm0
  8122  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8123  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8124  	LONG $0x5c10fbc5; WORD $0x1002 // vmovsd    xmm3, qword [rdx + rax + 16]
  8125  	LONG $0xe05ce3c5               // vsubsd    xmm4, xmm3, xmm0
  8126  	LONG $0x2cfbe1c4; BYTE $0xf4   // vcvttsd2si    rsi, xmm4
  8127  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8128  	LONG $0x2cfbe1c4; BYTE $0xfb   // vcvttsd2si    rdi, xmm3
  8129  	LONG $0xd82ef9c5               // vucomisd    xmm3, xmm0
  8130  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8131  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8132  	LONG $0xd26ce1c5               // vpunpcklqdq    xmm2, xmm3, xmm2
  8133  	LONG $0x547ffac5; WORD $0x1001 // vmovdqu    oword [rcx + rax + 16], xmm2
  8134  	LONG $0x0c7ffac5; BYTE $0x01   // vmovdqu    oword [rcx + rax], xmm1
  8135  	LONG $0x20c08348               // add    rax, 32
  8136  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8137  	JNE  LBB0_797
  8138  
  8139  LBB0_798:
  8140  	WORD $0x394d; BYTE $0xce // cmp    r14, r9
  8141  	JE   LBB0_1553
  8142  
  8143  LBB0_799:
  8144  	LONG $0x4510fbc5; BYTE $0x00 // vmovsd    xmm0, qword 0[rbp] /* [rip + .LCPI0_0] */
  8145  
  8146  LBB0_800:
  8147  	LONG $0x107ba1c4; WORD $0xf20c // vmovsd    xmm1, qword [rdx + 8*r14]
  8148  	LONG $0xd05cf3c5               // vsubsd    xmm2, xmm1, xmm0
  8149  	LONG $0x2cfbe1c4; BYTE $0xc2   // vcvttsd2si    rax, xmm2
  8150  	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
  8151  	LONG $0x2cfbe1c4; BYTE $0xf1   // vcvttsd2si    rsi, xmm1
  8152  	LONG $0xc12ef9c5               // vucomisd    xmm0, xmm1
  8153  	LONG $0xf0460f48               // cmovbe    rsi, rax
  8154  	LONG $0xf134894a               // mov    qword [rcx + 8*r14], rsi
  8155  	LONG $0x01c68349               // add    r14, 1
  8156  	WORD $0x394d; BYTE $0xf1       // cmp    r9, r14
  8157  	JNE  LBB0_800
  8158  	JMP  LBB0_1553
  8159  
  8160  LBB0_810:
  8161  	LONG $0xfce28349                       // and    r10, -4
  8162  	WORD $0xf749; BYTE $0xda               // neg    r10
  8163  	WORD $0xc031                           // xor    eax, eax
  8164  	LONG $0x4510fac5; BYTE $0x28           // vmovss    xmm0, dword 40[rbp] /* [rip + .LCPI0_1] */
  8165  	QUAD $0x000000000000bb49; WORD $0x8000 // mov    r11, -9223372036854775808
  8166  
  8167  LBB0_811:
  8168  	LONG $0x4c10fac5; WORD $0x0482 // vmovss    xmm1, dword [rdx + 4*rax + 4]
  8169  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8170  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8171  	LONG $0x1410fac5; BYTE $0x82   // vmovss    xmm2, dword [rdx + 4*rax]
  8172  	WORD $0x314c; BYTE $0xdf       // xor    rdi, r11
  8173  	LONG $0x2cfae1c4; BYTE $0xd9   // vcvttss2si    rbx, xmm1
  8174  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8175  	LONG $0xdf430f48               // cmovae    rbx, rdi
  8176  	LONG $0xc85ceac5               // vsubss    xmm1, xmm2, xmm0
  8177  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8178  	WORD $0x314c; BYTE $0xdf       // xor    rdi, r11
  8179  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8180  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8181  	LONG $0x6ef9e1c4; BYTE $0xcb   // vmovq    xmm1, rbx
  8182  	LONG $0xf7430f48               // cmovae    rsi, rdi
  8183  	LONG $0x6ef9e1c4; BYTE $0xd6   // vmovq    xmm2, rsi
  8184  	LONG $0x5c10fac5; WORD $0x0c82 // vmovss    xmm3, dword [rdx + 4*rax + 12]
  8185  	LONG $0xe05ce2c5               // vsubss    xmm4, xmm3, xmm0
  8186  	LONG $0x2cfae1c4; BYTE $0xf4   // vcvttss2si    rsi, xmm4
  8187  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8188  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8189  	LONG $0x2cfae1c4; BYTE $0xfb   // vcvttss2si    rdi, xmm3
  8190  	LONG $0xd82ef8c5               // vucomiss    xmm3, xmm0
  8191  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8192  	LONG $0x5410fac5; WORD $0x0882 // vmovss    xmm2, dword [rdx + 4*rax + 8]
  8193  	LONG $0xd85ceac5               // vsubss    xmm3, xmm2, xmm0
  8194  	LONG $0x2cfae1c4; BYTE $0xf3   // vcvttss2si    rsi, xmm3
  8195  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8196  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8197  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8198  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8199  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8200  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8201  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  8202  	LONG $0x547ffac5; WORD $0x10c1 // vmovdqu    oword [rcx + 8*rax + 16], xmm2
  8203  	LONG $0x0c7ffac5; BYTE $0xc1   // vmovdqu    oword [rcx + 8*rax], xmm1
  8204  	LONG $0x4c10fac5; WORD $0x1482 // vmovss    xmm1, dword [rdx + 4*rax + 20]
  8205  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8206  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8207  	LONG $0x5410fac5; WORD $0x1082 // vmovss    xmm2, dword [rdx + 4*rax + 16]
  8208  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8209  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8210  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8211  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8212  	LONG $0xc85ceac5               // vsubss    xmm1, xmm2, xmm0
  8213  	LONG $0x2cfae1c4; BYTE $0xf1   // vcvttss2si    rsi, xmm1
  8214  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8215  	LONG $0x2cfae1c4; BYTE $0xda   // vcvttss2si    rbx, xmm2
  8216  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8217  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8218  	LONG $0xde430f48               // cmovae    rbx, rsi
  8219  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8220  	LONG $0x5c10fac5; WORD $0x1c82 // vmovss    xmm3, dword [rdx + 4*rax + 28]
  8221  	LONG $0xe05ce2c5               // vsubss    xmm4, xmm3, xmm0
  8222  	LONG $0x2cfae1c4; BYTE $0xf4   // vcvttss2si    rsi, xmm4
  8223  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8224  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8225  	LONG $0x2cfae1c4; BYTE $0xfb   // vcvttss2si    rdi, xmm3
  8226  	LONG $0xd82ef8c5               // vucomiss    xmm3, xmm0
  8227  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8228  	LONG $0x5410fac5; WORD $0x1882 // vmovss    xmm2, dword [rdx + 4*rax + 24]
  8229  	LONG $0xd85ceac5               // vsubss    xmm3, xmm2, xmm0
  8230  	LONG $0x2cfae1c4; BYTE $0xf3   // vcvttss2si    rsi, xmm3
  8231  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8232  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8233  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8234  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8235  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8236  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8237  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  8238  	LONG $0x547ffac5; WORD $0x30c1 // vmovdqu    oword [rcx + 8*rax + 48], xmm2
  8239  	LONG $0x4c7ffac5; WORD $0x20c1 // vmovdqu    oword [rcx + 8*rax + 32], xmm1
  8240  	LONG $0x4c10fac5; WORD $0x2482 // vmovss    xmm1, dword [rdx + 4*rax + 36]
  8241  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8242  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8243  	LONG $0x5410fac5; WORD $0x2082 // vmovss    xmm2, dword [rdx + 4*rax + 32]
  8244  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8245  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8246  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8247  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8248  	LONG $0xc85ceac5               // vsubss    xmm1, xmm2, xmm0
  8249  	LONG $0x2cfae1c4; BYTE $0xf1   // vcvttss2si    rsi, xmm1
  8250  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8251  	LONG $0x2cfae1c4; BYTE $0xda   // vcvttss2si    rbx, xmm2
  8252  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8253  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8254  	LONG $0xde430f48               // cmovae    rbx, rsi
  8255  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8256  	LONG $0x5c10fac5; WORD $0x2c82 // vmovss    xmm3, dword [rdx + 4*rax + 44]
  8257  	LONG $0xe05ce2c5               // vsubss    xmm4, xmm3, xmm0
  8258  	LONG $0x2cfae1c4; BYTE $0xf4   // vcvttss2si    rsi, xmm4
  8259  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8260  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8261  	LONG $0x2cfae1c4; BYTE $0xfb   // vcvttss2si    rdi, xmm3
  8262  	LONG $0xd82ef8c5               // vucomiss    xmm3, xmm0
  8263  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8264  	LONG $0x5410fac5; WORD $0x2882 // vmovss    xmm2, dword [rdx + 4*rax + 40]
  8265  	LONG $0xd85ceac5               // vsubss    xmm3, xmm2, xmm0
  8266  	LONG $0x2cfae1c4; BYTE $0xf3   // vcvttss2si    rsi, xmm3
  8267  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8268  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8269  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8270  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8271  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8272  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8273  	LONG $0xd36ce9c5               // vpunpcklqdq    xmm2, xmm2, xmm3
  8274  	LONG $0x547ffac5; WORD $0x50c1 // vmovdqu    oword [rcx + 8*rax + 80], xmm2
  8275  	LONG $0x4c7ffac5; WORD $0x40c1 // vmovdqu    oword [rcx + 8*rax + 64], xmm1
  8276  	LONG $0x4c10fac5; WORD $0x3482 // vmovss    xmm1, dword [rdx + 4*rax + 52]
  8277  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8278  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8279  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8280  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8281  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8282  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8283  	LONG $0x4c10fac5; WORD $0x3082 // vmovss    xmm1, dword [rdx + 4*rax + 48]
  8284  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8285  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8286  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8287  	LONG $0x2cfae1c4; BYTE $0xd9   // vcvttss2si    rbx, xmm1
  8288  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8289  	LONG $0xde430f48               // cmovae    rbx, rsi
  8290  	LONG $0x6ef9e1c4; BYTE $0xcf   // vmovq    xmm1, rdi
  8291  	LONG $0x6ef9e1c4; BYTE $0xd3   // vmovq    xmm2, rbx
  8292  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8293  	LONG $0x5410fac5; WORD $0x3c82 // vmovss    xmm2, dword [rdx + 4*rax + 60]
  8294  	LONG $0xd85ceac5               // vsubss    xmm3, xmm2, xmm0
  8295  	LONG $0x2cfae1c4; BYTE $0xf3   // vcvttss2si    rsi, xmm3
  8296  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8297  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8298  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8299  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8300  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8301  	LONG $0x5c10fac5; WORD $0x3882 // vmovss    xmm3, dword [rdx + 4*rax + 56]
  8302  	LONG $0xe05ce2c5               // vsubss    xmm4, xmm3, xmm0
  8303  	LONG $0x2cfae1c4; BYTE $0xf4   // vcvttss2si    rsi, xmm4
  8304  	WORD $0x314c; BYTE $0xde       // xor    rsi, r11
  8305  	LONG $0x2cfae1c4; BYTE $0xfb   // vcvttss2si    rdi, xmm3
  8306  	LONG $0xd82ef8c5               // vucomiss    xmm3, xmm0
  8307  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8308  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8309  	LONG $0xd26ce1c5               // vpunpcklqdq    xmm2, xmm3, xmm2
  8310  	LONG $0x547ffac5; WORD $0x70c1 // vmovdqu    oword [rcx + 8*rax + 112], xmm2
  8311  	LONG $0x4c7ffac5; WORD $0x60c1 // vmovdqu    oword [rcx + 8*rax + 96], xmm1
  8312  	LONG $0x10c08348               // add    rax, 16
  8313  	LONG $0x04c28349               // add    r10, 4
  8314  	JNE  LBB0_811
  8315  
  8316  LBB0_812:
  8317  	WORD $0x854d; BYTE $0xc0               // test    r8, r8
  8318  	JE   LBB0_815
  8319  	LONG $0x02e0c148                       // shl    rax, 2
  8320  	WORD $0xf749; BYTE $0xd8               // neg    r8
  8321  	LONG $0x4510fac5; BYTE $0x28           // vmovss    xmm0, dword 40[rbp] /* [rip + .LCPI0_1] */
  8322  	QUAD $0x000000000000ba49; WORD $0x8000 // mov    r10, -9223372036854775808
  8323  
  8324  LBB0_814:
  8325  	LONG $0x4c10fac5; WORD $0x0402 // vmovss    xmm1, dword [rdx + rax + 4]
  8326  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8327  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8328  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  8329  	LONG $0x2cfae1c4; BYTE $0xd9   // vcvttss2si    rbx, xmm1
  8330  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8331  	LONG $0xde430f48               // cmovae    rbx, rsi
  8332  	LONG $0x0c10fac5; BYTE $0x02   // vmovss    xmm1, dword [rdx + rax]
  8333  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8334  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8335  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  8336  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8337  	LONG $0xc82ef8c5               // vucomiss    xmm1, xmm0
  8338  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8339  	LONG $0x6ef9e1c4; BYTE $0xcb   // vmovq    xmm1, rbx
  8340  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8341  	LONG $0xc96ce9c5               // vpunpcklqdq    xmm1, xmm2, xmm1
  8342  	LONG $0x5410fac5; WORD $0x0c02 // vmovss    xmm2, dword [rdx + rax + 12]
  8343  	LONG $0xd85ceac5               // vsubss    xmm3, xmm2, xmm0
  8344  	LONG $0x2cfae1c4; BYTE $0xf3   // vcvttss2si    rsi, xmm3
  8345  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  8346  	LONG $0x2cfae1c4; BYTE $0xfa   // vcvttss2si    rdi, xmm2
  8347  	LONG $0xd02ef8c5               // vucomiss    xmm2, xmm0
  8348  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8349  	LONG $0x6ef9e1c4; BYTE $0xd7   // vmovq    xmm2, rdi
  8350  	LONG $0x5c10fac5; WORD $0x0802 // vmovss    xmm3, dword [rdx + rax + 8]
  8351  	LONG $0xe05ce2c5               // vsubss    xmm4, xmm3, xmm0
  8352  	LONG $0x2cfae1c4; BYTE $0xf4   // vcvttss2si    rsi, xmm4
  8353  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  8354  	LONG $0x2cfae1c4; BYTE $0xfb   // vcvttss2si    rdi, xmm3
  8355  	LONG $0xd82ef8c5               // vucomiss    xmm3, xmm0
  8356  	LONG $0xfe430f48               // cmovae    rdi, rsi
  8357  	LONG $0x6ef9e1c4; BYTE $0xdf   // vmovq    xmm3, rdi
  8358  	LONG $0xd26ce1c5               // vpunpcklqdq    xmm2, xmm3, xmm2
  8359  	LONG $0x547ffac5; WORD $0x1041 // vmovdqu    oword [rcx + 2*rax + 16], xmm2
  8360  	LONG $0x0c7ffac5; BYTE $0x41   // vmovdqu    oword [rcx + 2*rax], xmm1
  8361  	LONG $0x10c08348               // add    rax, 16
  8362  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8363  	JNE  LBB0_814
  8364  
  8365  LBB0_815:
  8366  	WORD $0x394d; BYTE $0xce // cmp    r14, r9
  8367  	JE   LBB0_1553
  8368  
  8369  LBB0_816:
  8370  	LONG $0x4510fac5; BYTE $0x28           // vmovss    xmm0, dword 40[rbp] /* [rip + .LCPI0_1] */
  8371  	QUAD $0x000000000000b848; WORD $0x8000 // mov    rax, -9223372036854775808
  8372  
  8373  LBB0_817:
  8374  	LONG $0x107aa1c4; WORD $0xb20c // vmovss    xmm1, dword [rdx + 4*r14]
  8375  	LONG $0xd05cf2c5               // vsubss    xmm2, xmm1, xmm0
  8376  	LONG $0x2cfae1c4; BYTE $0xf2   // vcvttss2si    rsi, xmm2
  8377  	WORD $0x3148; BYTE $0xc6       // xor    rsi, rax
  8378  	LONG $0x2cfae1c4; BYTE $0xf9   // vcvttss2si    rdi, xmm1
  8379  	LONG $0xc12ef8c5               // vucomiss    xmm0, xmm1
  8380  	LONG $0xfe460f48               // cmovbe    rdi, rsi
  8381  	LONG $0xf13c894a               // mov    qword [rcx + 8*r14], rdi
  8382  	LONG $0x01c68349               // add    r14, 1
  8383  	WORD $0x394d; BYTE $0xf1       // cmp    r9, r14
  8384  	JNE  LBB0_817
  8385  	JMP  LBB0_1553
  8386  
  8387  LBB0_850:
  8388  	LONG $0xfce28349               // and    r10, -4
  8389  	WORD $0xf749; BYTE $0xda       // neg    r10
  8390  	WORD $0xc031                   // xor    eax, eax
  8391  	LONG $0x597de2c4; WORD $0x2045 // vpbroadcastq    ymm0, qword 32[rbp] /* [rip + .LCPI0_10] */
  8392  
  8393  LBB0_851:
  8394  	LONG $0x0c6ffec5; BYTE $0xc2   // vmovdqu    ymm1, yword [rdx + 8*rax]
  8395  	LONG $0xd0dbf5c5               // vpand    ymm2, ymm1, ymm0
  8396  	LONG $0xd173e5c5; BYTE $0x01   // vpsrlq    ymm3, ymm1, 1
  8397  	LONG $0xd2ebe5c5               // vpor    ymm2, ymm3, ymm2
  8398  	LONG $0x4b75e3c4; WORD $0x10ca // vblendvpd    ymm1, ymm1, ymm2, ymm1
  8399  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8400  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8401  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8402  	LONG $0x2ad2e1c4; BYTE $0xdf   // vcvtsi2ss    xmm3, xmm5, rdi
  8403  	LONG $0x397de3c4; WORD $0x01c9 // vextracti128    xmm1, ymm1, 1
  8404  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8405  	LONG $0x2ad2e1c4; BYTE $0xe7   // vcvtsi2ss    xmm4, xmm5, rdi
  8406  	LONG $0x2161e3c4; WORD $0x10d2 // vinsertps    xmm2, xmm3, xmm2, 16
  8407  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8408  	LONG $0x2169e3c4; WORD $0x20cc // vinsertps    xmm1, xmm2, xmm4, 32
  8409  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8410  	LONG $0x2171e3c4; WORD $0x30ca // vinsertps    xmm1, xmm1, xmm2, 48
  8411  	LONG $0x146ffac5; BYTE $0xc2   // vmovdqu    xmm2, oword [rdx + 8*rax]
  8412  	LONG $0x546be9c5; WORD $0x10c2 // vpackssdw    xmm2, xmm2, oword [rdx + 8*rax + 16]
  8413  	LONG $0xd958f0c5               // vaddps    xmm3, xmm1, xmm1
  8414  	LONG $0x4a71e3c4; WORD $0x20cb // vblendvps    xmm1, xmm1, xmm3, xmm2
  8415  	LONG $0x0c11f8c5; BYTE $0x81   // vmovups    oword [rcx + 4*rax], xmm1
  8416  	LONG $0x4c6ffec5; WORD $0x20c2 // vmovdqu    ymm1, yword [rdx + 8*rax + 32]
  8417  	LONG $0xd0dbf5c5               // vpand    ymm2, ymm1, ymm0
  8418  	LONG $0xd173e5c5; BYTE $0x01   // vpsrlq    ymm3, ymm1, 1
  8419  	LONG $0xd2ebe5c5               // vpor    ymm2, ymm3, ymm2
  8420  	LONG $0x4b75e3c4; WORD $0x10ca // vblendvpd    ymm1, ymm1, ymm2, ymm1
  8421  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8422  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8423  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8424  	LONG $0x2ad2e1c4; BYTE $0xdf   // vcvtsi2ss    xmm3, xmm5, rdi
  8425  	LONG $0x397de3c4; WORD $0x01c9 // vextracti128    xmm1, ymm1, 1
  8426  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8427  	LONG $0x2ad2e1c4; BYTE $0xe7   // vcvtsi2ss    xmm4, xmm5, rdi
  8428  	LONG $0x2161e3c4; WORD $0x10d2 // vinsertps    xmm2, xmm3, xmm2, 16
  8429  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8430  	LONG $0x2169e3c4; WORD $0x20cc // vinsertps    xmm1, xmm2, xmm4, 32
  8431  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8432  	LONG $0x2171e3c4; WORD $0x30ca // vinsertps    xmm1, xmm1, xmm2, 48
  8433  	LONG $0x546ffac5; WORD $0x20c2 // vmovdqu    xmm2, oword [rdx + 8*rax + 32]
  8434  	LONG $0x546be9c5; WORD $0x30c2 // vpackssdw    xmm2, xmm2, oword [rdx + 8*rax + 48]
  8435  	LONG $0xd958f0c5               // vaddps    xmm3, xmm1, xmm1
  8436  	LONG $0x4a71e3c4; WORD $0x20cb // vblendvps    xmm1, xmm1, xmm3, xmm2
  8437  	LONG $0x4c11f8c5; WORD $0x1081 // vmovups    oword [rcx + 4*rax + 16], xmm1
  8438  	LONG $0x4c6ffec5; WORD $0x40c2 // vmovdqu    ymm1, yword [rdx + 8*rax + 64]
  8439  	LONG $0xd0dbf5c5               // vpand    ymm2, ymm1, ymm0
  8440  	LONG $0xd173e5c5; BYTE $0x01   // vpsrlq    ymm3, ymm1, 1
  8441  	LONG $0xd2ebe5c5               // vpor    ymm2, ymm3, ymm2
  8442  	LONG $0x4b75e3c4; WORD $0x10ca // vblendvpd    ymm1, ymm1, ymm2, ymm1
  8443  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8444  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8445  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8446  	LONG $0x2ad2e1c4; BYTE $0xdf   // vcvtsi2ss    xmm3, xmm5, rdi
  8447  	LONG $0x397de3c4; WORD $0x01c9 // vextracti128    xmm1, ymm1, 1
  8448  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8449  	LONG $0x2ad2e1c4; BYTE $0xe7   // vcvtsi2ss    xmm4, xmm5, rdi
  8450  	LONG $0x2161e3c4; WORD $0x10d2 // vinsertps    xmm2, xmm3, xmm2, 16
  8451  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8452  	LONG $0x2169e3c4; WORD $0x20cc // vinsertps    xmm1, xmm2, xmm4, 32
  8453  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8454  	LONG $0x2171e3c4; WORD $0x30ca // vinsertps    xmm1, xmm1, xmm2, 48
  8455  	LONG $0x546ffac5; WORD $0x40c2 // vmovdqu    xmm2, oword [rdx + 8*rax + 64]
  8456  	LONG $0x546be9c5; WORD $0x50c2 // vpackssdw    xmm2, xmm2, oword [rdx + 8*rax + 80]
  8457  	LONG $0xd958f0c5               // vaddps    xmm3, xmm1, xmm1
  8458  	LONG $0x4a71e3c4; WORD $0x20cb // vblendvps    xmm1, xmm1, xmm3, xmm2
  8459  	LONG $0x4c11f8c5; WORD $0x2081 // vmovups    oword [rcx + 4*rax + 32], xmm1
  8460  	LONG $0x4c6ffec5; WORD $0x60c2 // vmovdqu    ymm1, yword [rdx + 8*rax + 96]
  8461  	LONG $0xd0dbf5c5               // vpand    ymm2, ymm1, ymm0
  8462  	LONG $0xd173e5c5; BYTE $0x01   // vpsrlq    ymm3, ymm1, 1
  8463  	LONG $0xd2ebe5c5               // vpor    ymm2, ymm3, ymm2
  8464  	LONG $0x4b75e3c4; WORD $0x10ca // vblendvpd    ymm1, ymm1, ymm2, ymm1
  8465  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8466  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8467  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8468  	LONG $0x2ad2e1c4; BYTE $0xdf   // vcvtsi2ss    xmm3, xmm5, rdi
  8469  	LONG $0x397de3c4; WORD $0x01c9 // vextracti128    xmm1, ymm1, 1
  8470  	LONG $0x16f9c3c4; WORD $0x01cb // vpextrq    r11, xmm1, 1
  8471  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8472  	LONG $0x2ad2e1c4; BYTE $0xcf   // vcvtsi2ss    xmm1, xmm5, rdi
  8473  	LONG $0x2161e3c4; WORD $0x10d2 // vinsertps    xmm2, xmm3, xmm2, 16
  8474  	LONG $0x2ad2c1c4; BYTE $0xdb   // vcvtsi2ss    xmm3, xmm5, r11
  8475  	LONG $0x2169e3c4; WORD $0x20c9 // vinsertps    xmm1, xmm2, xmm1, 32
  8476  	LONG $0x2171e3c4; WORD $0x30cb // vinsertps    xmm1, xmm1, xmm3, 48
  8477  	LONG $0xd158f0c5               // vaddps    xmm2, xmm1, xmm1
  8478  	LONG $0x5c6ffac5; WORD $0x60c2 // vmovdqu    xmm3, oword [rdx + 8*rax + 96]
  8479  	LONG $0x5c6be1c5; WORD $0x70c2 // vpackssdw    xmm3, xmm3, oword [rdx + 8*rax + 112]
  8480  	LONG $0x4a71e3c4; WORD $0x30ca // vblendvps    xmm1, xmm1, xmm2, xmm3
  8481  	LONG $0x4c11f8c5; WORD $0x3081 // vmovups    oword [rcx + 4*rax + 48], xmm1
  8482  	LONG $0x10c08348               // add    rax, 16
  8483  	LONG $0x04c28349               // add    r10, 4
  8484  	JNE  LBB0_851
  8485  
  8486  LBB0_852:
  8487  	WORD $0x854d; BYTE $0xc0       // test    r8, r8
  8488  	JE   LBB0_855
  8489  	LONG $0x02e0c148               // shl    rax, 2
  8490  	WORD $0xf749; BYTE $0xd8       // neg    r8
  8491  	LONG $0x597de2c4; WORD $0x2045 // vpbroadcastq    ymm0, qword 32[rbp] /* [rip + .LCPI0_10] */
  8492  
  8493  LBB0_854:
  8494  	LONG $0x0c6ffec5; BYTE $0x42   // vmovdqu    ymm1, yword [rdx + 2*rax]
  8495  	LONG $0xd0dbf5c5               // vpand    ymm2, ymm1, ymm0
  8496  	LONG $0xd173e5c5; BYTE $0x01   // vpsrlq    ymm3, ymm1, 1
  8497  	LONG $0xd2ebe5c5               // vpor    ymm2, ymm3, ymm2
  8498  	LONG $0x4b75e3c4; WORD $0x10ca // vblendvpd    ymm1, ymm1, ymm2, ymm1
  8499  	LONG $0x16f9e3c4; WORD $0x01cf // vpextrq    rdi, xmm1, 1
  8500  	LONG $0x2ad2e1c4; BYTE $0xd7   // vcvtsi2ss    xmm2, xmm5, rdi
  8501  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8502  	LONG $0x2ad2e1c4; BYTE $0xdf   // vcvtsi2ss    xmm3, xmm5, rdi
  8503  	LONG $0x397de3c4; WORD $0x01c9 // vextracti128    xmm1, ymm1, 1
  8504  	LONG $0x16f9c3c4; WORD $0x01ca // vpextrq    r10, xmm1, 1
  8505  	LONG $0x7ef9e1c4; BYTE $0xcf   // vmovq    rdi, xmm1
  8506  	LONG $0x2ad2e1c4; BYTE $0xcf   // vcvtsi2ss    xmm1, xmm5, rdi
  8507  	LONG $0x2161e3c4; WORD $0x10d2 // vinsertps    xmm2, xmm3, xmm2, 16
  8508  	LONG $0x2ad2c1c4; BYTE $0xda   // vcvtsi2ss    xmm3, xmm5, r10
  8509  	LONG $0x2169e3c4; WORD $0x20c9 // vinsertps    xmm1, xmm2, xmm1, 32
  8510  	LONG $0x2171e3c4; WORD $0x30cb // vinsertps    xmm1, xmm1, xmm3, 48
  8511  	LONG $0xd158f0c5               // vaddps    xmm2, xmm1, xmm1
  8512  	LONG $0x1c6ffac5; BYTE $0x42   // vmovdqu    xmm3, oword [rdx + 2*rax]
  8513  	LONG $0x5c6be1c5; WORD $0x1042 // vpackssdw    xmm3, xmm3, oword [rdx + 2*rax + 16]
  8514  	LONG $0x4a71e3c4; WORD $0x30ca // vblendvps    xmm1, xmm1, xmm2, xmm3
  8515  	LONG $0x0c11f8c5; BYTE $0x01   // vmovups    oword [rcx + rax], xmm1
  8516  	LONG $0x10c08348               // add    rax, 16
  8517  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8518  	JNE  LBB0_854
  8519  
  8520  LBB0_855:
  8521  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8522  	JNE  LBB0_858
  8523  	JMP  LBB0_1553
  8524  
  8525  LBB0_856:
  8526  	LONG $0x2ad2e1c4; BYTE $0xc0 // vcvtsi2ss    xmm0, xmm5, rax
  8527  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  8528  	LONG $0x01c68348             // add    rsi, 1
  8529  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  8530  	JE   LBB0_1553
  8531  
  8532  LBB0_858:
  8533  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  8534  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  8535  	JNS  LBB0_856
  8536  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
  8537  	WORD $0xd148; BYTE $0xef     // shr    rdi, 1
  8538  	WORD $0xe083; BYTE $0x01     // and    eax, 1
  8539  	WORD $0x0948; BYTE $0xf8     // or    rax, rdi
  8540  	LONG $0x2ad2e1c4; BYTE $0xc0 // vcvtsi2ss    xmm0, xmm5, rax
  8541  	LONG $0xc058fac5             // vaddss    xmm0, xmm0, xmm0
  8542  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  8543  	LONG $0x01c68348             // add    rsi, 1
  8544  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  8545  	JNE  LBB0_858
  8546  	JMP  LBB0_1553
  8547  
  8548  LBB0_914:
  8549  	WORD $0xff31 // xor    edi, edi
  8550  
  8551  LBB0_915:
  8552  	LONG $0x01c0f641                           // test    r8b, 1
  8553  	JE   LBB0_917
  8554  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  8555  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  8556  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  8557  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  8558  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  8559  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  8560  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  8561  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  8562  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  8563  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  8564  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  8565  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  8566  
  8567  LBB0_917:
  8568  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8569  	JE   LBB0_1553
  8570  
  8571  LBB0_918:
  8572  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  8573  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  8574  	LONG $0x01c68348         // add    rsi, 1
  8575  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8576  	JNE  LBB0_918
  8577  	JMP  LBB0_1553
  8578  
  8579  LBB0_919:
  8580  	WORD $0xff31 // xor    edi, edi
  8581  
  8582  LBB0_920:
  8583  	LONG $0x01c0f641               // test    r8b, 1
  8584  	JE   LBB0_922
  8585  	LONG $0x046ffec5; BYTE $0xfa   // vmovdqu    ymm0, yword [rdx + 8*rdi]
  8586  	LONG $0x4c6ffec5; WORD $0x20fa // vmovdqu    ymm1, yword [rdx + 8*rdi + 32]
  8587  	LONG $0x546ffec5; WORD $0x40fa // vmovdqu    ymm2, yword [rdx + 8*rdi + 64]
  8588  	LONG $0x5c6ffec5; WORD $0x60fa // vmovdqu    ymm3, yword [rdx + 8*rdi + 96]
  8589  	LONG $0xe457d9c5               // vxorpd    xmm4, xmm4, xmm4
  8590  	LONG $0x027de3c4; WORD $0xaaec // vpblendd    ymm5, ymm0, ymm4, 170
  8591  	LONG $0x597de2c4; WORD $0x0875 // vpbroadcastq    ymm6, qword 8[rbp] /* [rip + .LCPI0_5] */
  8592  	LONG $0xeeebd5c5               // vpor    ymm5, ymm5, ymm6
  8593  	LONG $0xd073fdc5; BYTE $0x20   // vpsrlq    ymm0, ymm0, 32
  8594  	LONG $0x597de2c4; WORD $0x107d // vpbroadcastq    ymm7, qword 16[rbp] /* [rip + .LCPI0_6] */
  8595  	LONG $0xc7ebfdc5               // vpor    ymm0, ymm0, ymm7
  8596  	LONG $0x197d62c4; WORD $0x1845 // vbroadcastsd    ymm8, qword 24[rbp] /* [rip + .LCPI0_7] */
  8597  	LONG $0x5c7dc1c4; BYTE $0xc0   // vsubpd    ymm0, ymm0, ymm8
  8598  	LONG $0xc058d5c5               // vaddpd    ymm0, ymm5, ymm0
  8599  	LONG $0x0275e3c4; WORD $0xaaec // vpblendd    ymm5, ymm1, ymm4, 170
  8600  	LONG $0xeeebd5c5               // vpor    ymm5, ymm5, ymm6
  8601  	LONG $0xd173f5c5; BYTE $0x20   // vpsrlq    ymm1, ymm1, 32
  8602  	LONG $0xcfebf5c5               // vpor    ymm1, ymm1, ymm7
  8603  	LONG $0x5c75c1c4; BYTE $0xc8   // vsubpd    ymm1, ymm1, ymm8
  8604  	LONG $0xc958d5c5               // vaddpd    ymm1, ymm5, ymm1
  8605  	LONG $0x026de3c4; WORD $0xaaec // vpblendd    ymm5, ymm2, ymm4, 170
  8606  	LONG $0xeeebd5c5               // vpor    ymm5, ymm5, ymm6
  8607  	LONG $0xd273edc5; BYTE $0x20   // vpsrlq    ymm2, ymm2, 32
  8608  	LONG $0xd7ebedc5               // vpor    ymm2, ymm2, ymm7
  8609  	LONG $0x5c6dc1c4; BYTE $0xd0   // vsubpd    ymm2, ymm2, ymm8
  8610  	LONG $0xd258d5c5               // vaddpd    ymm2, ymm5, ymm2
  8611  	LONG $0x0265e3c4; WORD $0xaae4 // vpblendd    ymm4, ymm3, ymm4, 170
  8612  	LONG $0xe6ebddc5               // vpor    ymm4, ymm4, ymm6
  8613  	LONG $0xd373e5c5; BYTE $0x20   // vpsrlq    ymm3, ymm3, 32
  8614  	LONG $0xdfebe5c5               // vpor    ymm3, ymm3, ymm7
  8615  	LONG $0x5c65c1c4; BYTE $0xd8   // vsubpd    ymm3, ymm3, ymm8
  8616  	LONG $0xdb58ddc5               // vaddpd    ymm3, ymm4, ymm3
  8617  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
  8618  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  8619  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  8620  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  8621  
  8622  LBB0_922:
  8623  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8624  	JE   LBB0_1553
  8625  
  8626  LBB0_923:
  8627  	LONG $0x4528f9c5; BYTE $0x50 // vmovapd    xmm0, oword 80[rbp] /* [rip + .LCPI0_8] */
  8628  	LONG $0x4d28f9c5; BYTE $0x60 // vmovapd    xmm1, oword 96[rbp] /* [rip + .LCPI0_9] */
  8629  
  8630  LBB0_924:
  8631  	LONG $0x1410fbc5; BYTE $0xf2   // vmovsd    xmm2, qword [rdx + 8*rsi]
  8632  	LONG $0xd014e8c5               // vunpcklps    xmm2, xmm2, xmm0
  8633  	LONG $0xd15ce9c5               // vsubpd    xmm2, xmm2, xmm1
  8634  	LONG $0x0579e3c4; WORD $0x01da // vpermilpd    xmm3, xmm2, 1
  8635  	LONG $0xd258e3c5               // vaddsd    xmm2, xmm3, xmm2
  8636  	LONG $0x1411fbc5; BYTE $0xf1   // vmovsd    qword [rcx + 8*rsi], xmm2
  8637  	LONG $0x01c68348               // add    rsi, 1
  8638  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  8639  	JNE  LBB0_924
  8640  	JMP  LBB0_1553
  8641  
  8642  LBB0_925:
  8643  	WORD $0xff31 // xor    edi, edi
  8644  
  8645  LBB0_926:
  8646  	LONG $0x01c0f641               // test    r8b, 1
  8647  	JE   LBB0_928
  8648  	LONG $0x04e6fec5; BYTE $0xba   // vcvtdq2pd    ymm0, oword [rdx + 4*rdi]
  8649  	LONG $0x4ce6fec5; WORD $0x10ba // vcvtdq2pd    ymm1, oword [rdx + 4*rdi + 16]
  8650  	LONG $0x54e6fec5; WORD $0x20ba // vcvtdq2pd    ymm2, oword [rdx + 4*rdi + 32]
  8651  	LONG $0x5ce6fec5; WORD $0x30ba // vcvtdq2pd    ymm3, oword [rdx + 4*rdi + 48]
  8652  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
  8653  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  8654  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  8655  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  8656  
  8657  LBB0_928:
  8658  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8659  	JE   LBB0_1553
  8660  
  8661  LBB0_929:
  8662  	LONG $0x042adbc5; BYTE $0xb2 // vcvtsi2sd    xmm0, xmm4, dword [rdx + 4*rsi]
  8663  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
  8664  	LONG $0x01c68348             // add    rsi, 1
  8665  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  8666  	JNE  LBB0_929
  8667  	JMP  LBB0_1553
  8668  
  8669  LBB0_930:
  8670  	WORD $0xff31 // xor    edi, edi
  8671  
  8672  LBB0_931:
  8673  	LONG $0x01c0f641                           // test    r8b, 1
  8674  	JE   LBB0_933
  8675  	LONG $0x357de2c4; WORD $0xba04             // vpmovzxdq    ymm0, oword [rdx + 4*rdi]
  8676  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x10 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 16]
  8677  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x20 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 32]
  8678  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x30 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 48]
  8679  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  8680  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  8681  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  8682  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  8683  
  8684  LBB0_933:
  8685  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8686  	JE   LBB0_1553
  8687  
  8688  LBB0_934:
  8689  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  8690  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  8691  	LONG $0x01c68348         // add    rsi, 1
  8692  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8693  	JNE  LBB0_934
  8694  	JMP  LBB0_1553
  8695  
  8696  LBB0_935:
  8697  	WORD $0xff31 // xor    edi, edi
  8698  
  8699  LBB0_936:
  8700  	LONG $0x01c0f641                           // test    r8b, 1
  8701  	JE   LBB0_938
  8702  	LONG $0x347de2c4; WORD $0x7a04             // vpmovzxwq    ymm0, qword [rdx + 2*rdi]
  8703  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 8]
  8704  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 16]
  8705  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 24]
  8706  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  8707  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  8708  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  8709  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  8710  
  8711  LBB0_938:
  8712  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8713  	JE   LBB0_1553
  8714  
  8715  LBB0_939:
  8716  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  8717  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  8718  	LONG $0x01c68348         // add    rsi, 1
  8719  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8720  	JNE  LBB0_939
  8721  	JMP  LBB0_1553
  8722  
  8723  LBB0_940:
  8724  	WORD $0xff31 // xor    edi, edi
  8725  
  8726  LBB0_941:
  8727  	LONG $0x01c0f641                           // test    r8b, 1
  8728  	JE   LBB0_943
  8729  	LONG $0x247de2c4; WORD $0x7a04             // vpmovsxwq    ymm0, qword [rdx + 2*rdi]
  8730  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 8]
  8731  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 16]
  8732  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 24]
  8733  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  8734  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  8735  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  8736  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  8737  
  8738  LBB0_943:
  8739  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8740  	JE   LBB0_1553
  8741  
  8742  LBB0_944:
  8743  	LONG $0x04bf0f48; BYTE $0x72 // movsx    rax, word [rdx + 2*rsi]
  8744  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  8745  	LONG $0x01c68348             // add    rsi, 1
  8746  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  8747  	JNE  LBB0_944
  8748  	JMP  LBB0_1553
  8749  
  8750  LBB0_945:
  8751  	WORD $0xff31 // xor    edi, edi
  8752  
  8753  LBB0_946:
  8754  	LONG $0x01c0f641                           // test    r8b, 1
  8755  	JE   LBB0_948
  8756  	LONG $0x257de2c4; WORD $0xba04             // vpmovsxdq    ymm0, oword [rdx + 4*rdi]
  8757  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x10 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 16]
  8758  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x20 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 32]
  8759  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x30 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 48]
  8760  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  8761  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  8762  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  8763  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  8764  
  8765  LBB0_948:
  8766  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8767  	JE   LBB0_1553
  8768  
  8769  LBB0_949:
  8770  	LONG $0xb2046348         // movsxd    rax, dword [rdx + 4*rsi]
  8771  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  8772  	LONG $0x01c68348         // add    rsi, 1
  8773  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8774  	JNE  LBB0_949
  8775  	JMP  LBB0_1553
  8776  
  8777  LBB0_950:
  8778  	WORD $0xff31 // xor    edi, edi
  8779  
  8780  LBB0_951:
  8781  	LONG $0x01c0f641                           // test    r8b, 1
  8782  	JE   LBB0_953
  8783  	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
  8784  	LONG $0x0e79e3c4; WORD $0xfa0c; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi], 17
  8785  	QUAD $0x1110fa540e79e3c4                   // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  8786  	QUAD $0x1120fa5c0e79e3c4                   // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  8787  	QUAD $0x1130fa640e79e3c4                   // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  8788  	QUAD $0x1140fa6c0e79e3c4                   // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  8789  	QUAD $0x1150fa740e79e3c4                   // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  8790  	QUAD $0x1160fa7c0e79e3c4                   // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  8791  	QUAD $0x1170fa440e79e3c4                   // vpblendw    xmm0, xmm0, oword [rdx + 8*rdi + 112], 17
  8792  	LONG $0x384de3c4; WORD $0x01c0             // vinserti128    ymm0, ymm6, xmm0, 1
  8793  	LONG $0x3855e3c4; WORD $0x01ef             // vinserti128    ymm5, ymm5, xmm7, 1
  8794  	LONG $0x2b55e2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm5, ymm0
  8795  	LONG $0x2b7de2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm0, ymm0
  8796  	LONG $0x386de3c4; WORD $0x01d4             // vinserti128    ymm2, ymm2, xmm4, 1
  8797  	LONG $0x3875e3c4; WORD $0x01cb             // vinserti128    ymm1, ymm1, xmm3, 1
  8798  	LONG $0x2b75e2c4; BYTE $0xca               // vpackusdw    ymm1, ymm1, ymm2
  8799  	LONG $0x2b75e2c4; BYTE $0xc8               // vpackusdw    ymm1, ymm1, ymm0
  8800  	LONG $0xc06cf5c5                           // vpunpcklqdq    ymm0, ymm1, ymm0
  8801  	LONG $0x00fde3c4; WORD $0xd8c0             // vpermq    ymm0, ymm0, 216
  8802  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  8803  
  8804  LBB0_953:
  8805  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8806  	JE   LBB0_1553
  8807  
  8808  LBB0_954:
  8809  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  8810  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  8811  	LONG $0x01c68348         // add    rsi, 1
  8812  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8813  	JNE  LBB0_954
  8814  	JMP  LBB0_1553
  8815  
  8816  LBB0_955:
  8817  	WORD $0xff31 // xor    edi, edi
  8818  
  8819  LBB0_956:
  8820  	LONG $0x01c0f641                           // test    r8b, 1
  8821  	JE   LBB0_958
  8822  	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
  8823  	LONG $0x0e79e3c4; WORD $0xfa0c; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi], 17
  8824  	QUAD $0x1110fa540e79e3c4                   // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  8825  	QUAD $0x1120fa5c0e79e3c4                   // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  8826  	QUAD $0x1130fa640e79e3c4                   // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  8827  	QUAD $0x1140fa6c0e79e3c4                   // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  8828  	QUAD $0x1150fa740e79e3c4                   // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  8829  	QUAD $0x1160fa7c0e79e3c4                   // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  8830  	QUAD $0x1170fa440e79e3c4                   // vpblendw    xmm0, xmm0, oword [rdx + 8*rdi + 112], 17
  8831  	LONG $0x384de3c4; WORD $0x01c0             // vinserti128    ymm0, ymm6, xmm0, 1
  8832  	LONG $0x3855e3c4; WORD $0x01ef             // vinserti128    ymm5, ymm5, xmm7, 1
  8833  	LONG $0x2b55e2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm5, ymm0
  8834  	LONG $0x2b7de2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm0, ymm0
  8835  	LONG $0x386de3c4; WORD $0x01d4             // vinserti128    ymm2, ymm2, xmm4, 1
  8836  	LONG $0x3875e3c4; WORD $0x01cb             // vinserti128    ymm1, ymm1, xmm3, 1
  8837  	LONG $0x2b75e2c4; BYTE $0xca               // vpackusdw    ymm1, ymm1, ymm2
  8838  	LONG $0x2b75e2c4; BYTE $0xc8               // vpackusdw    ymm1, ymm1, ymm0
  8839  	LONG $0xc06cf5c5                           // vpunpcklqdq    ymm0, ymm1, ymm0
  8840  	LONG $0x00fde3c4; WORD $0xd8c0             // vpermq    ymm0, ymm0, 216
  8841  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  8842  
  8843  LBB0_958:
  8844  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8845  	JE   LBB0_1553
  8846  
  8847  LBB0_959:
  8848  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  8849  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  8850  	LONG $0x01c68348         // add    rsi, 1
  8851  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8852  	JNE  LBB0_959
  8853  	JMP  LBB0_1553
  8854  
  8855  LBB0_960:
  8856  	WORD $0xff31 // xor    edi, edi
  8857  
  8858  LBB0_961:
  8859  	LONG $0x01c0f641                           // test    r8b, 1
  8860  	JE   LBB0_963
  8861  	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
  8862  	LONG $0x0e79e3c4; WORD $0xfa0c; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi], 17
  8863  	QUAD $0x1110fa540e79e3c4                   // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  8864  	QUAD $0x1120fa5c0e79e3c4                   // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  8865  	QUAD $0x1130fa640e79e3c4                   // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  8866  	QUAD $0x1140fa6c0e79e3c4                   // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  8867  	QUAD $0x1150fa740e79e3c4                   // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  8868  	QUAD $0x1160fa7c0e79e3c4                   // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  8869  	QUAD $0x1170fa440e79e3c4                   // vpblendw    xmm0, xmm0, oword [rdx + 8*rdi + 112], 17
  8870  	LONG $0x384de3c4; WORD $0x01c0             // vinserti128    ymm0, ymm6, xmm0, 1
  8871  	LONG $0x3855e3c4; WORD $0x01ef             // vinserti128    ymm5, ymm5, xmm7, 1
  8872  	LONG $0x2b55e2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm5, ymm0
  8873  	LONG $0x2b7de2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm0, ymm0
  8874  	LONG $0x386de3c4; WORD $0x01d4             // vinserti128    ymm2, ymm2, xmm4, 1
  8875  	LONG $0x3875e3c4; WORD $0x01cb             // vinserti128    ymm1, ymm1, xmm3, 1
  8876  	LONG $0x2b75e2c4; BYTE $0xca               // vpackusdw    ymm1, ymm1, ymm2
  8877  	LONG $0x2b75e2c4; BYTE $0xc8               // vpackusdw    ymm1, ymm1, ymm0
  8878  	LONG $0xc06cf5c5                           // vpunpcklqdq    ymm0, ymm1, ymm0
  8879  	LONG $0x00fde3c4; WORD $0xd8c0             // vpermq    ymm0, ymm0, 216
  8880  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  8881  
  8882  LBB0_963:
  8883  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8884  	JE   LBB0_1553
  8885  
  8886  LBB0_964:
  8887  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  8888  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  8889  	LONG $0x01c68348         // add    rsi, 1
  8890  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8891  	JNE  LBB0_964
  8892  	JMP  LBB0_1553
  8893  
  8894  LBB0_965:
  8895  	WORD $0xff31 // xor    edi, edi
  8896  
  8897  LBB0_966:
  8898  	LONG $0x01c0f641               // test    r8b, 1
  8899  	JE   LBB0_968
  8900  	LONG $0x046ffec5; BYTE $0xba   // vmovdqu    ymm0, yword [rdx + 4*rdi]
  8901  	LONG $0x4c6ffec5; WORD $0x20ba // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
  8902  	LONG $0x546ffec5; WORD $0x40ba // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
  8903  	LONG $0x5c6ffec5; WORD $0x60ba // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
  8904  	QUAD $0x00000080a56ffdc5       // vmovdqa    ymm4, yword 128[rbp] /* [rip + .LCPI0_11] */
  8905  	LONG $0x007de2c4; BYTE $0xc4   // vpshufb    ymm0, ymm0, ymm4
  8906  	LONG $0x00fde3c4; WORD $0xe8c0 // vpermq    ymm0, ymm0, 232
  8907  	LONG $0x0075e2c4; BYTE $0xcc   // vpshufb    ymm1, ymm1, ymm4
  8908  	LONG $0x00fde3c4; WORD $0xe8c9 // vpermq    ymm1, ymm1, 232
  8909  	LONG $0x006de2c4; BYTE $0xd4   // vpshufb    ymm2, ymm2, ymm4
  8910  	LONG $0x00fde3c4; WORD $0xe8d2 // vpermq    ymm2, ymm2, 232
  8911  	LONG $0x0065e2c4; BYTE $0xdc   // vpshufb    ymm3, ymm3, ymm4
  8912  	LONG $0x00fde3c4; WORD $0xe8db // vpermq    ymm3, ymm3, 232
  8913  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  8914  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  8915  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  8916  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  8917  
  8918  LBB0_968:
  8919  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8920  	JE   LBB0_1553
  8921  
  8922  LBB0_969:
  8923  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  8924  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  8925  	LONG $0x01c68348         // add    rsi, 1
  8926  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8927  	JNE  LBB0_969
  8928  	JMP  LBB0_1553
  8929  
  8930  LBB0_970:
  8931  	WORD $0xff31 // xor    edi, edi
  8932  
  8933  LBB0_971:
  8934  	LONG $0x01c0f641               // test    r8b, 1
  8935  	JE   LBB0_973
  8936  	LONG $0x046ffec5; BYTE $0xba   // vmovdqu    ymm0, yword [rdx + 4*rdi]
  8937  	LONG $0x4c6ffec5; WORD $0x20ba // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
  8938  	LONG $0x546ffec5; WORD $0x40ba // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
  8939  	LONG $0x5c6ffec5; WORD $0x60ba // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
  8940  	QUAD $0x00000080a56ffdc5       // vmovdqa    ymm4, yword 128[rbp] /* [rip + .LCPI0_11] */
  8941  	LONG $0x007de2c4; BYTE $0xc4   // vpshufb    ymm0, ymm0, ymm4
  8942  	LONG $0x00fde3c4; WORD $0xe8c0 // vpermq    ymm0, ymm0, 232
  8943  	LONG $0x0075e2c4; BYTE $0xcc   // vpshufb    ymm1, ymm1, ymm4
  8944  	LONG $0x00fde3c4; WORD $0xe8c9 // vpermq    ymm1, ymm1, 232
  8945  	LONG $0x006de2c4; BYTE $0xd4   // vpshufb    ymm2, ymm2, ymm4
  8946  	LONG $0x00fde3c4; WORD $0xe8d2 // vpermq    ymm2, ymm2, 232
  8947  	LONG $0x0065e2c4; BYTE $0xdc   // vpshufb    ymm3, ymm3, ymm4
  8948  	LONG $0x00fde3c4; WORD $0xe8db // vpermq    ymm3, ymm3, 232
  8949  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  8950  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  8951  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  8952  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  8953  
  8954  LBB0_973:
  8955  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8956  	JE   LBB0_1553
  8957  
  8958  LBB0_974:
  8959  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  8960  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  8961  	LONG $0x01c68348         // add    rsi, 1
  8962  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8963  	JNE  LBB0_974
  8964  	JMP  LBB0_1553
  8965  
  8966  LBB0_975:
  8967  	WORD $0xff31 // xor    edi, edi
  8968  
  8969  LBB0_976:
  8970  	LONG $0x01c0f641                           // test    r8b, 1
  8971  	JE   LBB0_978
  8972  	LONG $0x347de2c4; WORD $0x7a04             // vpmovzxwq    ymm0, qword [rdx + 2*rdi]
  8973  	LONG $0x347de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwq    ymm1, qword [rdx + 2*rdi + 8]
  8974  	LONG $0x347de2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwq    ymm2, qword [rdx + 2*rdi + 16]
  8975  	LONG $0x347de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwq    ymm3, qword [rdx + 2*rdi + 24]
  8976  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  8977  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  8978  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  8979  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  8980  
  8981  LBB0_978:
  8982  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8983  	JE   LBB0_1553
  8984  
  8985  LBB0_979:
  8986  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  8987  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  8988  	LONG $0x01c68348         // add    rsi, 1
  8989  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  8990  	JNE  LBB0_979
  8991  	JMP  LBB0_1553
  8992  
  8993  LBB0_980:
  8994  	WORD $0xff31 // xor    edi, edi
  8995  
  8996  LBB0_981:
  8997  	LONG $0x01c0f641                           // test    r8b, 1
  8998  	JE   LBB0_983
  8999  	LONG $0x257de2c4; WORD $0xba04             // vpmovsxdq    ymm0, oword [rdx + 4*rdi]
  9000  	LONG $0x257de2c4; WORD $0xba4c; BYTE $0x10 // vpmovsxdq    ymm1, oword [rdx + 4*rdi + 16]
  9001  	LONG $0x257de2c4; WORD $0xba54; BYTE $0x20 // vpmovsxdq    ymm2, oword [rdx + 4*rdi + 32]
  9002  	LONG $0x257de2c4; WORD $0xba5c; BYTE $0x30 // vpmovsxdq    ymm3, oword [rdx + 4*rdi + 48]
  9003  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  9004  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  9005  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  9006  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  9007  
  9008  LBB0_983:
  9009  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9010  	JE   LBB0_1553
  9011  
  9012  LBB0_984:
  9013  	LONG $0xb2046348         // movsxd    rax, dword [rdx + 4*rsi]
  9014  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  9015  	LONG $0x01c68348         // add    rsi, 1
  9016  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9017  	JNE  LBB0_984
  9018  	JMP  LBB0_1553
  9019  
  9020  LBB0_985:
  9021  	WORD $0xff31 // xor    edi, edi
  9022  
  9023  LBB0_986:
  9024  	LONG $0x01c0f641               // test    r8b, 1
  9025  	JE   LBB0_988
  9026  	LONG $0x045bfcc5; BYTE $0xba   // vcvtdq2ps    ymm0, yword [rdx + 4*rdi]
  9027  	LONG $0x4c5bfcc5; WORD $0x20ba // vcvtdq2ps    ymm1, yword [rdx + 4*rdi + 32]
  9028  	LONG $0x545bfcc5; WORD $0x40ba // vcvtdq2ps    ymm2, yword [rdx + 4*rdi + 64]
  9029  	LONG $0x5c5bfcc5; WORD $0x60ba // vcvtdq2ps    ymm3, yword [rdx + 4*rdi + 96]
  9030  	LONG $0x0411fcc5; BYTE $0xb9   // vmovups    yword [rcx + 4*rdi], ymm0
  9031  	LONG $0x4c11fcc5; WORD $0x20b9 // vmovups    yword [rcx + 4*rdi + 32], ymm1
  9032  	LONG $0x5411fcc5; WORD $0x40b9 // vmovups    yword [rcx + 4*rdi + 64], ymm2
  9033  	LONG $0x5c11fcc5; WORD $0x60b9 // vmovups    yword [rcx + 4*rdi + 96], ymm3
  9034  
  9035  LBB0_988:
  9036  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9037  	JE   LBB0_1553
  9038  
  9039  LBB0_989:
  9040  	LONG $0x042adac5; BYTE $0xb2 // vcvtsi2ss    xmm0, xmm4, dword [rdx + 4*rsi]
  9041  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  9042  	LONG $0x01c68348             // add    rsi, 1
  9043  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9044  	JNE  LBB0_989
  9045  	JMP  LBB0_1553
  9046  
  9047  LBB0_990:
  9048  	WORD $0xff31 // xor    edi, edi
  9049  
  9050  LBB0_991:
  9051  	LONG $0x01c0f641               // test    r8b, 1
  9052  	JE   LBB0_993
  9053  	LONG $0x04e6fdc5; BYTE $0xfa   // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  9054  	LONG $0x4ce6fdc5; WORD $0x20fa // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  9055  	LONG $0x54e6fdc5; WORD $0x40fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  9056  	LONG $0x5ce6fdc5; WORD $0x60fa // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  9057  	LONG $0x0411f9c5; BYTE $0xb9   // vmovupd    oword [rcx + 4*rdi], xmm0
  9058  	LONG $0x4c11f9c5; WORD $0x10b9 // vmovupd    oword [rcx + 4*rdi + 16], xmm1
  9059  	LONG $0x5411f9c5; WORD $0x20b9 // vmovupd    oword [rcx + 4*rdi + 32], xmm2
  9060  	LONG $0x5c11f9c5; WORD $0x30b9 // vmovupd    oword [rcx + 4*rdi + 48], xmm3
  9061  
  9062  LBB0_993:
  9063  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9064  	JE   LBB0_1553
  9065  
  9066  LBB0_994:
  9067  	LONG $0x042cfbc5; BYTE $0xf2 // vcvttsd2si    eax, qword [rdx + 8*rsi]
  9068  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  9069  	LONG $0x01c68348             // add    rsi, 1
  9070  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9071  	JNE  LBB0_994
  9072  	JMP  LBB0_1553
  9073  
  9074  LBB0_995:
  9075  	WORD $0xff31 // xor    edi, edi
  9076  
  9077  LBB0_996:
  9078  	LONG $0x01c0f641                           // test    r8b, 1
  9079  	JE   LBB0_998
  9080  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  9081  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  9082  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  9083  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  9084  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  9085  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  9086  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  9087  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  9088  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  9089  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  9090  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  9091  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  9092  
  9093  LBB0_998:
  9094  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9095  	JE   LBB0_1553
  9096  
  9097  LBB0_999:
  9098  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  9099  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9100  	LONG $0x01c68348         // add    rsi, 1
  9101  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9102  	JNE  LBB0_999
  9103  	JMP  LBB0_1553
  9104  
  9105  LBB0_1000:
  9106  	WORD $0xff31 // xor    edi, edi
  9107  
  9108  LBB0_1001:
  9109  	LONG $0x01c0f641                           // test    r8b, 1
  9110  	JE   LBB0_1003
  9111  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  9112  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  9113  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  9114  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  9115  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  9116  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  9117  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  9118  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  9119  
  9120  LBB0_1003:
  9121  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9122  	JE   LBB0_1553
  9123  
  9124  LBB0_1004:
  9125  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  9126  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9127  	LONG $0x01c68348         // add    rsi, 1
  9128  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9129  	JNE  LBB0_1004
  9130  	JMP  LBB0_1553
  9131  
  9132  LBB0_1005:
  9133  	WORD $0xff31 // xor    edi, edi
  9134  
  9135  LBB0_1006:
  9136  	LONG $0x01c0f641                           // test    r8b, 1
  9137  	JE   LBB0_1008
  9138  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
  9139  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
  9140  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
  9141  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
  9142  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  9143  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  9144  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  9145  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  9146  
  9147  LBB0_1008:
  9148  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9149  	JE   LBB0_1553
  9150  
  9151  LBB0_1009:
  9152  	LONG $0x7204bf0f         // movsx    eax, word [rdx + 2*rsi]
  9153  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9154  	LONG $0x01c68348         // add    rsi, 1
  9155  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9156  	JNE  LBB0_1009
  9157  	JMP  LBB0_1553
  9158  
  9159  LBB0_1010:
  9160  	WORD $0xff31 // xor    edi, edi
  9161  
  9162  LBB0_1011:
  9163  	LONG $0x01c0f641                           // test    r8b, 1
  9164  	JE   LBB0_1013
  9165  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  9166  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  9167  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  9168  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  9169  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  9170  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  9171  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  9172  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  9173  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  9174  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  9175  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  9176  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  9177  
  9178  LBB0_1013:
  9179  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9180  	JE   LBB0_1553
  9181  
  9182  LBB0_1014:
  9183  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  9184  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9185  	LONG $0x01c68348         // add    rsi, 1
  9186  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9187  	JNE  LBB0_1014
  9188  	JMP  LBB0_1553
  9189  
  9190  LBB0_1015:
  9191  	WORD $0xff31 // xor    edi, edi
  9192  
  9193  LBB0_1016:
  9194  	LONG $0x01c0f641               // test    r8b, 1
  9195  	JE   LBB0_1018
  9196  	LONG $0x045bfec5; BYTE $0xba   // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  9197  	LONG $0x4c5bfec5; WORD $0x20ba // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  9198  	LONG $0x545bfec5; WORD $0x40ba // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  9199  	LONG $0x5c5bfec5; WORD $0x60ba // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  9200  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
  9201  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
  9202  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
  9203  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
  9204  
  9205  LBB0_1018:
  9206  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9207  	JE   LBB0_1553
  9208  
  9209  LBB0_1019:
  9210  	LONG $0x042cfac5; BYTE $0xb2 // vcvttss2si    eax, dword [rdx + 4*rsi]
  9211  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  9212  	LONG $0x01c68348             // add    rsi, 1
  9213  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9214  	JNE  LBB0_1019
  9215  	JMP  LBB0_1553
  9216  
  9217  LBB0_1020:
  9218  	WORD $0xff31 // xor    edi, edi
  9219  
  9220  LBB0_1021:
  9221  	LONG $0x01c0f641                           // test    r8b, 1
  9222  	JE   LBB0_1023
  9223  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  9224  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  9225  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  9226  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  9227  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  9228  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  9229  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  9230  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  9231  
  9232  LBB0_1023:
  9233  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9234  	JE   LBB0_1553
  9235  
  9236  LBB0_1024:
  9237  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  9238  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9239  	LONG $0x01c68348         // add    rsi, 1
  9240  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9241  	JNE  LBB0_1024
  9242  	JMP  LBB0_1553
  9243  
  9244  LBB0_1025:
  9245  	WORD $0xff31 // xor    edi, edi
  9246  
  9247  LBB0_1026:
  9248  	LONG $0x01c0f641                           // test    r8b, 1
  9249  	JE   LBB0_1028
  9250  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
  9251  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
  9252  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
  9253  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
  9254  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
  9255  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
  9256  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
  9257  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
  9258  
  9259  LBB0_1028:
  9260  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9261  	JE   LBB0_1553
  9262  
  9263  LBB0_1029:
  9264  	LONG $0x7204bf0f         // movsx    eax, word [rdx + 2*rsi]
  9265  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9266  	LONG $0x01c68348         // add    rsi, 1
  9267  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9268  	JNE  LBB0_1029
  9269  	JMP  LBB0_1553
  9270  
  9271  LBB0_1030:
  9272  	WORD $0xff31 // xor    edi, edi
  9273  
  9274  LBB0_1031:
  9275  	LONG $0x01c0f641                           // test    r8b, 1
  9276  	JE   LBB0_1033
  9277  	LONG $0x0410f8c5; BYTE $0xfa               // vmovups    xmm0, oword [rdx + 8*rdi]
  9278  	LONG $0x4c10f8c5; WORD $0x20fa             // vmovups    xmm1, oword [rdx + 8*rdi + 32]
  9279  	LONG $0x5410f8c5; WORD $0x40fa             // vmovups    xmm2, oword [rdx + 8*rdi + 64]
  9280  	LONG $0x5c10f8c5; WORD $0x60fa             // vmovups    xmm3, oword [rdx + 8*rdi + 96]
  9281  	LONG $0x44c6f8c5; WORD $0x10fa; BYTE $0x88 // vshufps    xmm0, xmm0, oword [rdx + 8*rdi + 16], 136
  9282  	LONG $0x4cc6f0c5; WORD $0x30fa; BYTE $0x88 // vshufps    xmm1, xmm1, oword [rdx + 8*rdi + 48], 136
  9283  	LONG $0x54c6e8c5; WORD $0x50fa; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdx + 8*rdi + 80], 136
  9284  	LONG $0x5cc6e0c5; WORD $0x70fa; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdx + 8*rdi + 112], 136
  9285  	LONG $0x0411f8c5; BYTE $0xb9               // vmovups    oword [rcx + 4*rdi], xmm0
  9286  	LONG $0x4c11f8c5; WORD $0x10b9             // vmovups    oword [rcx + 4*rdi + 16], xmm1
  9287  	LONG $0x5411f8c5; WORD $0x20b9             // vmovups    oword [rcx + 4*rdi + 32], xmm2
  9288  	LONG $0x5c11f8c5; WORD $0x30b9             // vmovups    oword [rcx + 4*rdi + 48], xmm3
  9289  
  9290  LBB0_1033:
  9291  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9292  	JE   LBB0_1553
  9293  
  9294  LBB0_1034:
  9295  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  9296  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  9297  	LONG $0x01c68348         // add    rsi, 1
  9298  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9299  	JNE  LBB0_1034
  9300  	JMP  LBB0_1553
  9301  
  9302  LBB0_1035:
  9303  	WORD $0xff31 // xor    edi, edi
  9304  
  9305  LBB0_1036:
  9306  	LONG $0x01c0f641               // test    r8b, 1
  9307  	JE   LBB0_1038
  9308  	LONG $0x0410f8c5; BYTE $0xba   // vmovups    xmm0, oword [rdx + 4*rdi]
  9309  	LONG $0x1879e2c4; WORD $0x2c4d // vbroadcastss    xmm1, dword 44[rbp] /* [rip + .LCPI0_2] */
  9310  	LONG $0xd1c2f8c5; BYTE $0x01   // vcmpltps    xmm2, xmm0, xmm1
  9311  	LONG $0xd95cf8c5               // vsubps    xmm3, xmm0, xmm1
  9312  	LONG $0xdb5bfac5               // vcvttps2dq    xmm3, xmm3
  9313  	LONG $0x1879e2c4; WORD $0x3065 // vbroadcastss    xmm4, dword 48[rbp] /* [rip + .LCPI0_3] */
  9314  	LONG $0xdc57e0c5               // vxorps    xmm3, xmm3, xmm4
  9315  	LONG $0xc05bfac5               // vcvttps2dq    xmm0, xmm0
  9316  	LONG $0x4a61e3c4; WORD $0x20c0 // vblendvps    xmm0, xmm3, xmm0, xmm2
  9317  	LONG $0x5410f8c5; WORD $0x10ba // vmovups    xmm2, oword [rdx + 4*rdi + 16]
  9318  	LONG $0xd9c2e8c5; BYTE $0x01   // vcmpltps    xmm3, xmm2, xmm1
  9319  	LONG $0xe95ce8c5               // vsubps    xmm5, xmm2, xmm1
  9320  	LONG $0xed5bfac5               // vcvttps2dq    xmm5, xmm5
  9321  	LONG $0xec57d0c5               // vxorps    xmm5, xmm5, xmm4
  9322  	LONG $0xd25bfac5               // vcvttps2dq    xmm2, xmm2
  9323  	LONG $0x4a51e3c4; WORD $0x30d2 // vblendvps    xmm2, xmm5, xmm2, xmm3
  9324  	LONG $0x5c10f8c5; WORD $0x20ba // vmovups    xmm3, oword [rdx + 4*rdi + 32]
  9325  	LONG $0xe9c2e0c5; BYTE $0x01   // vcmpltps    xmm5, xmm3, xmm1
  9326  	LONG $0xf15ce0c5               // vsubps    xmm6, xmm3, xmm1
  9327  	LONG $0xf65bfac5               // vcvttps2dq    xmm6, xmm6
  9328  	LONG $0xf457c8c5               // vxorps    xmm6, xmm6, xmm4
  9329  	LONG $0xdb5bfac5               // vcvttps2dq    xmm3, xmm3
  9330  	LONG $0x4a49e3c4; WORD $0x50db // vblendvps    xmm3, xmm6, xmm3, xmm5
  9331  	LONG $0x6c10f8c5; WORD $0x30ba // vmovups    xmm5, oword [rdx + 4*rdi + 48]
  9332  	LONG $0xf1c2d0c5; BYTE $0x01   // vcmpltps    xmm6, xmm5, xmm1
  9333  	LONG $0xc95cd0c5               // vsubps    xmm1, xmm5, xmm1
  9334  	LONG $0xc95bfac5               // vcvttps2dq    xmm1, xmm1
  9335  	LONG $0xcc57f0c5               // vxorps    xmm1, xmm1, xmm4
  9336  	LONG $0xe55bfac5               // vcvttps2dq    xmm4, xmm5
  9337  	LONG $0x4a71e3c4; WORD $0x60cc // vblendvps    xmm1, xmm1, xmm4, xmm6
  9338  	LONG $0x0411f8c5; BYTE $0xb9   // vmovups    oword [rcx + 4*rdi], xmm0
  9339  	LONG $0x5411f8c5; WORD $0x10b9 // vmovups    oword [rcx + 4*rdi + 16], xmm2
  9340  	LONG $0x5c11f8c5; WORD $0x20b9 // vmovups    oword [rcx + 4*rdi + 32], xmm3
  9341  	LONG $0x4c11f8c5; WORD $0x30b9 // vmovups    oword [rcx + 4*rdi + 48], xmm1
  9342  
  9343  LBB0_1038:
  9344  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9345  	JE   LBB0_1553
  9346  
  9347  LBB0_1039:
  9348  	LONG $0x2cfae1c4; WORD $0xb204 // vcvttss2si    rax, dword [rdx + 4*rsi]
  9349  	WORD $0x0489; BYTE $0xb1       // mov    dword [rcx + 4*rsi], eax
  9350  	LONG $0x01c68348               // add    rsi, 1
  9351  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  9352  	JNE  LBB0_1039
  9353  	JMP  LBB0_1553
  9354  
  9355  LBB0_1040:
  9356  	WORD $0xff31 // xor    edi, edi
  9357  
  9358  LBB0_1041:
  9359  	LONG $0x01c0f641                           // test    r8b, 1
  9360  	JE   LBB0_1043
  9361  	LONG $0x357de2c4; WORD $0xba04             // vpmovzxdq    ymm0, oword [rdx + 4*rdi]
  9362  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x10 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 16]
  9363  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x20 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 32]
  9364  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x30 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 48]
  9365  	LONG $0x597de2c4; WORD $0x0865             // vpbroadcastq    ymm4, qword 8[rbp] /* [rip + .LCPI0_5] */
  9366  	LONG $0xc4ebfdc5                           // vpor    ymm0, ymm0, ymm4
  9367  	LONG $0xc45cfdc5                           // vsubpd    ymm0, ymm0, ymm4
  9368  	LONG $0xccebf5c5                           // vpor    ymm1, ymm1, ymm4
  9369  	LONG $0xcc5cf5c5                           // vsubpd    ymm1, ymm1, ymm4
  9370  	LONG $0xd4ebedc5                           // vpor    ymm2, ymm2, ymm4
  9371  	LONG $0xd45cedc5                           // vsubpd    ymm2, ymm2, ymm4
  9372  	LONG $0xdcebe5c5                           // vpor    ymm3, ymm3, ymm4
  9373  	LONG $0xdc5ce5c5                           // vsubpd    ymm3, ymm3, ymm4
  9374  	LONG $0x0411fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm0
  9375  	LONG $0x4c11fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  9376  	LONG $0x5411fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  9377  	LONG $0x5c11fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  9378  
  9379  LBB0_1043:
  9380  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9381  	JE   LBB0_1553
  9382  
  9383  LBB0_1044:
  9384  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  9385  	LONG $0x2ad3e1c4; BYTE $0xc0 // vcvtsi2sd    xmm0, xmm5, rax
  9386  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
  9387  	LONG $0x01c68348             // add    rsi, 1
  9388  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9389  	JNE  LBB0_1044
  9390  	JMP  LBB0_1553
  9391  
  9392  LBB0_1045:
  9393  	WORD $0xff31 // xor    edi, edi
  9394  
  9395  LBB0_1046:
  9396  	LONG $0x01c0f641                           // test    r8b, 1
  9397  	JE   LBB0_1048
  9398  	LONG $0x3379e2c4; WORD $0x7a04             // vpmovzxwd    xmm0, qword [rdx + 2*rdi]
  9399  	LONG $0x3379e2c4; WORD $0x7a4c; BYTE $0x08 // vpmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  9400  	LONG $0x3379e2c4; WORD $0x7a54; BYTE $0x10 // vpmovzxwd    xmm2, qword [rdx + 2*rdi + 16]
  9401  	LONG $0x3379e2c4; WORD $0x7a5c; BYTE $0x18 // vpmovzxwd    xmm3, qword [rdx + 2*rdi + 24]
  9402  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  9403  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  9404  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  9405  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  9406  	LONG $0x0411fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm0
  9407  	LONG $0x4c11fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  9408  	LONG $0x5411fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  9409  	LONG $0x5c11fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  9410  
  9411  LBB0_1048:
  9412  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9413  	JE   LBB0_1553
  9414  
  9415  LBB0_1049:
  9416  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  9417  	LONG $0xc02adbc5             // vcvtsi2sd    xmm0, xmm4, eax
  9418  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
  9419  	LONG $0x01c68348             // add    rsi, 1
  9420  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9421  	JNE  LBB0_1049
  9422  	JMP  LBB0_1553
  9423  
  9424  LBB0_1050:
  9425  	WORD $0xff31 // xor    edi, edi
  9426  
  9427  LBB0_1051:
  9428  	LONG $0x01c0f641                           // test    r8b, 1
  9429  	JE   LBB0_1053
  9430  	LONG $0x2379e2c4; WORD $0x7a04             // vpmovsxwd    xmm0, qword [rdx + 2*rdi]
  9431  	LONG $0x2379e2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  9432  	LONG $0x2379e2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwd    xmm2, qword [rdx + 2*rdi + 16]
  9433  	LONG $0x2379e2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwd    xmm3, qword [rdx + 2*rdi + 24]
  9434  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
  9435  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
  9436  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
  9437  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
  9438  	LONG $0x0411fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm0
  9439  	LONG $0x4c11fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  9440  	LONG $0x5411fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  9441  	LONG $0x5c11fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  9442  
  9443  LBB0_1053:
  9444  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9445  	JE   LBB0_1553
  9446  
  9447  LBB0_1054:
  9448  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
  9449  	LONG $0xc02adbc5             // vcvtsi2sd    xmm0, xmm4, eax
  9450  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
  9451  	LONG $0x01c68348             // add    rsi, 1
  9452  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9453  	JNE  LBB0_1054
  9454  	JMP  LBB0_1553
  9455  
  9456  LBB0_1055:
  9457  	WORD $0xff31 // xor    edi, edi
  9458  
  9459  LBB0_1056:
  9460  	LONG $0x01c0f641               // test    r8b, 1
  9461  	JE   LBB0_1058
  9462  	LONG $0x046ffac5; BYTE $0xfa   // vmovdqu    xmm0, oword [rdx + 8*rdi]
  9463  	LONG $0x4c6ffac5; WORD $0x10fa // vmovdqu    xmm1, oword [rdx + 8*rdi + 16]
  9464  	LONG $0x5c6ffac5; WORD $0x20fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
  9465  	LONG $0x546ffac5; WORD $0x30fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 48]
  9466  	LONG $0x16f9e3c4; WORD $0x01c0 // vpextrq    rax, xmm0, 1
  9467  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9468  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
  9469  	LONG $0x2aa3e1c4; BYTE $0xc0   // vcvtsi2sd    xmm0, xmm11, rax
  9470  	LONG $0xc41479c5               // vunpcklpd    xmm8, xmm0, xmm4
  9471  	LONG $0x16f9e3c4; WORD $0x01c8 // vpextrq    rax, xmm1, 1
  9472  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9473  	LONG $0x7ef9e1c4; BYTE $0xc8   // vmovq    rax, xmm1
  9474  	LONG $0x2aa3e1c4; BYTE $0xc8   // vcvtsi2sd    xmm1, xmm11, rax
  9475  	LONG $0xcc14f1c5               // vunpcklpd    xmm1, xmm1, xmm4
  9476  	LONG $0x16f9e3c4; WORD $0x01d0 // vpextrq    rax, xmm2, 1
  9477  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9478  	LONG $0x7ef9e1c4; BYTE $0xd0   // vmovq    rax, xmm2
  9479  	LONG $0x2aa3e1c4; BYTE $0xd0   // vcvtsi2sd    xmm2, xmm11, rax
  9480  	LONG $0xd414e9c5               // vunpcklpd    xmm2, xmm2, xmm4
  9481  	LONG $0x16f9e3c4; WORD $0x01d8 // vpextrq    rax, xmm3, 1
  9482  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9483  	LONG $0x7ef9e1c4; BYTE $0xd8   // vmovq    rax, xmm3
  9484  	LONG $0x2aa3e1c4; BYTE $0xd8   // vcvtsi2sd    xmm3, xmm11, rax
  9485  	LONG $0x6c6ffac5; WORD $0x50fa // vmovdqu    xmm5, oword [rdx + 8*rdi + 80]
  9486  	LONG $0x16f9e3c4; WORD $0x01e8 // vpextrq    rax, xmm5, 1
  9487  	LONG $0x2aa3e1c4; BYTE $0xf0   // vcvtsi2sd    xmm6, xmm11, rax
  9488  	LONG $0x7ef9e1c4; BYTE $0xe8   // vmovq    rax, xmm5
  9489  	LONG $0x2aa3e1c4; BYTE $0xe8   // vcvtsi2sd    xmm5, xmm11, rax
  9490  	LONG $0x7c6ffac5; WORD $0x40fa // vmovdqu    xmm7, oword [rdx + 8*rdi + 64]
  9491  	LONG $0x16f9e3c4; WORD $0x01f8 // vpextrq    rax, xmm7, 1
  9492  	LONG $0x2aa3e1c4; BYTE $0xc0   // vcvtsi2sd    xmm0, xmm11, rax
  9493  	LONG $0xdc14e1c5               // vunpcklpd    xmm3, xmm3, xmm4
  9494  	LONG $0x7ef9e1c4; BYTE $0xf8   // vmovq    rax, xmm7
  9495  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9496  	LONG $0xee14d1c5               // vunpcklpd    xmm5, xmm5, xmm6
  9497  	LONG $0x746ffac5; WORD $0x70fa // vmovdqu    xmm6, oword [rdx + 8*rdi + 112]
  9498  	LONG $0x16f9e3c4; WORD $0x01f0 // vpextrq    rax, xmm6, 1
  9499  	LONG $0xc014d9c5               // vunpcklpd    xmm0, xmm4, xmm0
  9500  	LONG $0x2aa3e1c4; BYTE $0xe0   // vcvtsi2sd    xmm4, xmm11, rax
  9501  	LONG $0x7ef9e1c4; BYTE $0xf0   // vmovq    rax, xmm6
  9502  	LONG $0x2aa3e1c4; BYTE $0xf0   // vcvtsi2sd    xmm6, xmm11, rax
  9503  	LONG $0xe414c9c5               // vunpcklpd    xmm4, xmm6, xmm4
  9504  	LONG $0x746ffac5; WORD $0x60fa // vmovdqu    xmm6, oword [rdx + 8*rdi + 96]
  9505  	LONG $0x16f9e3c4; WORD $0x01f0 // vpextrq    rax, xmm6, 1
  9506  	LONG $0x2aa3e1c4; BYTE $0xf8   // vcvtsi2sd    xmm7, xmm11, rax
  9507  	LONG $0x7ef9e1c4; BYTE $0xf0   // vmovq    rax, xmm6
  9508  	LONG $0x2aa3e1c4; BYTE $0xf0   // vcvtsi2sd    xmm6, xmm11, rax
  9509  	LONG $0xf714c9c5               // vunpcklpd    xmm6, xmm6, xmm7
  9510  	LONG $0x4c11f9c5; WORD $0x10f9 // vmovupd    oword [rcx + 8*rdi + 16], xmm1
  9511  	LONG $0x041179c5; BYTE $0xf9   // vmovupd    oword [rcx + 8*rdi], xmm8
  9512  	LONG $0x5c11f9c5; WORD $0x20f9 // vmovupd    oword [rcx + 8*rdi + 32], xmm3
  9513  	LONG $0x5411f9c5; WORD $0x30f9 // vmovupd    oword [rcx + 8*rdi + 48], xmm2
  9514  	LONG $0x4411f9c5; WORD $0x40f9 // vmovupd    oword [rcx + 8*rdi + 64], xmm0
  9515  	LONG $0x6c11f9c5; WORD $0x50f9 // vmovupd    oword [rcx + 8*rdi + 80], xmm5
  9516  	LONG $0x7411f9c5; WORD $0x60f9 // vmovupd    oword [rcx + 8*rdi + 96], xmm6
  9517  	LONG $0x6411f9c5; WORD $0x70f9 // vmovupd    oword [rcx + 8*rdi + 112], xmm4
  9518  
  9519  LBB0_1058:
  9520  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9521  	JE   LBB0_1553
  9522  
  9523  LBB0_1059:
  9524  	LONG $0x2aa3e1c4; WORD $0xf204 // vcvtsi2sd    xmm0, xmm11, qword [rdx + 8*rsi]
  9525  	LONG $0x0411fbc5; BYTE $0xf1   // vmovsd    qword [rcx + 8*rsi], xmm0
  9526  	LONG $0x01c68348               // add    rsi, 1
  9527  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  9528  	JNE  LBB0_1059
  9529  	JMP  LBB0_1553
  9530  
  9531  LBB0_1060:
  9532  	WORD $0xff31 // xor    edi, edi
  9533  
  9534  LBB0_1061:
  9535  	LONG $0x01c0f641               // test    r8b, 1
  9536  	JE   LBB0_1063
  9537  	LONG $0x045afcc5; BYTE $0xba   // vcvtps2pd    ymm0, oword [rdx + 4*rdi]
  9538  	LONG $0x4c5afcc5; WORD $0x10ba // vcvtps2pd    ymm1, oword [rdx + 4*rdi + 16]
  9539  	LONG $0x545afcc5; WORD $0x20ba // vcvtps2pd    ymm2, oword [rdx + 4*rdi + 32]
  9540  	LONG $0x5c5afcc5; WORD $0x30ba // vcvtps2pd    ymm3, oword [rdx + 4*rdi + 48]
  9541  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
  9542  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
  9543  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
  9544  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
  9545  
  9546  LBB0_1063:
  9547  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9548  	JE   LBB0_1553
  9549  
  9550  LBB0_1064:
  9551  	LONG $0x0410fac5; BYTE $0xb2 // vmovss    xmm0, dword [rdx + 4*rsi]
  9552  	LONG $0xc05afac5             // vcvtss2sd    xmm0, xmm0, xmm0
  9553  	LONG $0x0411fbc5; BYTE $0xf1 // vmovsd    qword [rcx + 8*rsi], xmm0
  9554  	LONG $0x01c68348             // add    rsi, 1
  9555  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9556  	JNE  LBB0_1064
  9557  	JMP  LBB0_1553
  9558  
  9559  LBB0_1065:
  9560  	WORD $0xff31 // xor    edi, edi
  9561  
  9562  LBB0_1066:
  9563  	LONG $0x01c0f641               // test    r8b, 1
  9564  	JE   LBB0_1068
  9565  	LONG $0x046ffec5; BYTE $0xba   // vmovdqu    ymm0, yword [rdx + 4*rdi]
  9566  	LONG $0x4c6ffec5; WORD $0x20ba // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
  9567  	LONG $0x546ffec5; WORD $0x40ba // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
  9568  	LONG $0x5c6ffec5; WORD $0x60ba // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
  9569  	QUAD $0x00000080a56ffdc5       // vmovdqa    ymm4, yword 128[rbp] /* [rip + .LCPI0_11] */
  9570  	LONG $0x007de2c4; BYTE $0xc4   // vpshufb    ymm0, ymm0, ymm4
  9571  	LONG $0x00fde3c4; WORD $0xe8c0 // vpermq    ymm0, ymm0, 232
  9572  	LONG $0x0075e2c4; BYTE $0xcc   // vpshufb    ymm1, ymm1, ymm4
  9573  	LONG $0x00fde3c4; WORD $0xe8c9 // vpermq    ymm1, ymm1, 232
  9574  	LONG $0x006de2c4; BYTE $0xd4   // vpshufb    ymm2, ymm2, ymm4
  9575  	LONG $0x00fde3c4; WORD $0xe8d2 // vpermq    ymm2, ymm2, 232
  9576  	LONG $0x0065e2c4; BYTE $0xdc   // vpshufb    ymm3, ymm3, ymm4
  9577  	LONG $0x00fde3c4; WORD $0xe8db // vpermq    ymm3, ymm3, 232
  9578  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  9579  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  9580  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  9581  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  9582  
  9583  LBB0_1068:
  9584  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9585  	JE   LBB0_1553
  9586  
  9587  LBB0_1069:
  9588  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  9589  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  9590  	LONG $0x01c68348         // add    rsi, 1
  9591  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9592  	JNE  LBB0_1069
  9593  	JMP  LBB0_1553
  9594  
  9595  LBB0_1070:
  9596  	WORD $0xff31 // xor    edi, edi
  9597  
  9598  LBB0_1071:
  9599  	LONG $0x01c0f641               // test    r8b, 1
  9600  	JE   LBB0_1073
  9601  	LONG $0x046ffec5; BYTE $0xba   // vmovdqu    ymm0, yword [rdx + 4*rdi]
  9602  	LONG $0x4c6ffec5; WORD $0x20ba // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
  9603  	LONG $0x546ffec5; WORD $0x40ba // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
  9604  	LONG $0x5c6ffec5; WORD $0x60ba // vmovdqu    ymm3, yword [rdx + 4*rdi + 96]
  9605  	QUAD $0x00000080a56ffdc5       // vmovdqa    ymm4, yword 128[rbp] /* [rip + .LCPI0_11] */
  9606  	LONG $0x007de2c4; BYTE $0xc4   // vpshufb    ymm0, ymm0, ymm4
  9607  	LONG $0x00fde3c4; WORD $0xe8c0 // vpermq    ymm0, ymm0, 232
  9608  	LONG $0x0075e2c4; BYTE $0xcc   // vpshufb    ymm1, ymm1, ymm4
  9609  	LONG $0x00fde3c4; WORD $0xe8c9 // vpermq    ymm1, ymm1, 232
  9610  	LONG $0x006de2c4; BYTE $0xd4   // vpshufb    ymm2, ymm2, ymm4
  9611  	LONG $0x00fde3c4; WORD $0xe8d2 // vpermq    ymm2, ymm2, 232
  9612  	LONG $0x0065e2c4; BYTE $0xdc   // vpshufb    ymm3, ymm3, ymm4
  9613  	LONG $0x00fde3c4; WORD $0xe8db // vpermq    ymm3, ymm3, 232
  9614  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  9615  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  9616  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  9617  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  9618  
  9619  LBB0_1073:
  9620  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9621  	JE   LBB0_1553
  9622  
  9623  LBB0_1074:
  9624  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  9625  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  9626  	LONG $0x01c68348         // add    rsi, 1
  9627  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9628  	JNE  LBB0_1074
  9629  	JMP  LBB0_1553
  9630  
  9631  LBB0_1075:
  9632  	WORD $0xff31 // xor    edi, edi
  9633  
  9634  LBB0_1076:
  9635  	LONG $0x01c0f641               // test    r8b, 1
  9636  	JE   LBB0_1078
  9637  	LONG $0x04e6fdc5; BYTE $0xfa   // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  9638  	LONG $0x4ce6fdc5; WORD $0x20fa // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  9639  	LONG $0x54e6fdc5; WORD $0x40fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  9640  	LONG $0x5ce6fdc5; WORD $0x60fa // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  9641  	LONG $0x186de3c4; WORD $0x01d3 // vinsertf128    ymm2, ymm2, xmm3, 1
  9642  	LONG $0x2b6de2c4; BYTE $0xd0   // vpackusdw    ymm2, ymm2, ymm0
  9643  	LONG $0x187de3c4; WORD $0x01c1 // vinsertf128    ymm0, ymm0, xmm1, 1
  9644  	LONG $0x2b7de2c4; BYTE $0xc0   // vpackusdw    ymm0, ymm0, ymm0
  9645  	LONG $0xc26cfdc5               // vpunpcklqdq    ymm0, ymm0, ymm2
  9646  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
  9647  	LONG $0x047ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm0
  9648  
  9649  LBB0_1078:
  9650  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9651  	JE   LBB0_1553
  9652  
  9653  LBB0_1079:
  9654  	LONG $0x042cfbc5; BYTE $0xf2 // vcvttsd2si    eax, qword [rdx + 8*rsi]
  9655  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  9656  	LONG $0x01c68348             // add    rsi, 1
  9657  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9658  	JNE  LBB0_1079
  9659  	JMP  LBB0_1553
  9660  
  9661  LBB0_1080:
  9662  	WORD $0xff31 // xor    edi, edi
  9663  
  9664  LBB0_1081:
  9665  	LONG $0x01c0f641               // test    r8b, 1
  9666  	JE   LBB0_1083
  9667  	LONG $0x04e6fdc5; BYTE $0xfa   // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
  9668  	LONG $0x4ce6fdc5; WORD $0x20fa // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
  9669  	LONG $0x54e6fdc5; WORD $0x40fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
  9670  	LONG $0x5ce6fdc5; WORD $0x60fa // vcvttpd2dq    xmm3, yword [rdx + 8*rdi + 96]
  9671  	LONG $0x186de3c4; WORD $0x01d3 // vinsertf128    ymm2, ymm2, xmm3, 1
  9672  	LONG $0xd06bedc5               // vpackssdw    ymm2, ymm2, ymm0
  9673  	LONG $0x187de3c4; WORD $0x01c1 // vinsertf128    ymm0, ymm0, xmm1, 1
  9674  	LONG $0xc06bfdc5               // vpackssdw    ymm0, ymm0, ymm0
  9675  	LONG $0xc26cfdc5               // vpunpcklqdq    ymm0, ymm0, ymm2
  9676  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
  9677  	LONG $0x047ffec5; BYTE $0x79   // vmovdqu    yword [rcx + 2*rdi], ymm0
  9678  
  9679  LBB0_1083:
  9680  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9681  	JE   LBB0_1553
  9682  
  9683  LBB0_1084:
  9684  	LONG $0x042cfbc5; BYTE $0xf2 // vcvttsd2si    eax, qword [rdx + 8*rsi]
  9685  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  9686  	LONG $0x01c68348             // add    rsi, 1
  9687  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9688  	JNE  LBB0_1084
  9689  	JMP  LBB0_1553
  9690  
  9691  LBB0_1085:
  9692  	WORD $0xff31 // xor    edi, edi
  9693  
  9694  LBB0_1086:
  9695  	LONG $0x01c0f641                           // test    r8b, 1
  9696  	JE   LBB0_1088
  9697  	LONG $0xc0eff9c5                           // vpxor    xmm0, xmm0, xmm0
  9698  	LONG $0x0e79e3c4; WORD $0xfa0c; BYTE $0x11 // vpblendw    xmm1, xmm0, oword [rdx + 8*rdi], 17
  9699  	QUAD $0x1110fa540e79e3c4                   // vpblendw    xmm2, xmm0, oword [rdx + 8*rdi + 16], 17
  9700  	QUAD $0x1120fa5c0e79e3c4                   // vpblendw    xmm3, xmm0, oword [rdx + 8*rdi + 32], 17
  9701  	QUAD $0x1130fa640e79e3c4                   // vpblendw    xmm4, xmm0, oword [rdx + 8*rdi + 48], 17
  9702  	QUAD $0x1140fa6c0e79e3c4                   // vpblendw    xmm5, xmm0, oword [rdx + 8*rdi + 64], 17
  9703  	QUAD $0x1150fa740e79e3c4                   // vpblendw    xmm6, xmm0, oword [rdx + 8*rdi + 80], 17
  9704  	QUAD $0x1160fa7c0e79e3c4                   // vpblendw    xmm7, xmm0, oword [rdx + 8*rdi + 96], 17
  9705  	QUAD $0x1170fa440e79e3c4                   // vpblendw    xmm0, xmm0, oword [rdx + 8*rdi + 112], 17
  9706  	LONG $0x384de3c4; WORD $0x01c0             // vinserti128    ymm0, ymm6, xmm0, 1
  9707  	LONG $0x3855e3c4; WORD $0x01ef             // vinserti128    ymm5, ymm5, xmm7, 1
  9708  	LONG $0x2b55e2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm5, ymm0
  9709  	LONG $0x2b7de2c4; BYTE $0xc0               // vpackusdw    ymm0, ymm0, ymm0
  9710  	LONG $0x386de3c4; WORD $0x01d4             // vinserti128    ymm2, ymm2, xmm4, 1
  9711  	LONG $0x3875e3c4; WORD $0x01cb             // vinserti128    ymm1, ymm1, xmm3, 1
  9712  	LONG $0x2b75e2c4; BYTE $0xca               // vpackusdw    ymm1, ymm1, ymm2
  9713  	LONG $0x2b75e2c4; BYTE $0xc8               // vpackusdw    ymm1, ymm1, ymm0
  9714  	LONG $0xc06cf5c5                           // vpunpcklqdq    ymm0, ymm1, ymm0
  9715  	LONG $0x00fde3c4; WORD $0xd8c0             // vpermq    ymm0, ymm0, 216
  9716  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
  9717  
  9718  LBB0_1088:
  9719  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9720  	JE   LBB0_1553
  9721  
  9722  LBB0_1089:
  9723  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  9724  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  9725  	LONG $0x01c68348         // add    rsi, 1
  9726  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9727  	JNE  LBB0_1089
  9728  	JMP  LBB0_1553
  9729  
  9730  LBB0_1090:
  9731  	WORD $0xff31 // xor    edi, edi
  9732  
  9733  LBB0_1091:
  9734  	LONG $0x01c0f641               // test    r8b, 1
  9735  	JE   LBB0_1093
  9736  	LONG $0x045bfec5; BYTE $0xba   // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  9737  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
  9738  	LONG $0x2b79e2c4; BYTE $0xc1   // vpackusdw    xmm0, xmm0, xmm1
  9739  	LONG $0x4c5bfec5; WORD $0x20ba // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  9740  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
  9741  	LONG $0x2b71e2c4; BYTE $0xca   // vpackusdw    xmm1, xmm1, xmm2
  9742  	LONG $0x545bfec5; WORD $0x40ba // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  9743  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
  9744  	LONG $0x2b69e2c4; BYTE $0xd3   // vpackusdw    xmm2, xmm2, xmm3
  9745  	LONG $0x5c5bfec5; WORD $0x60ba // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  9746  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
  9747  	LONG $0x2b61e2c4; BYTE $0xdc   // vpackusdw    xmm3, xmm3, xmm4
  9748  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  9749  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  9750  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  9751  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  9752  
  9753  LBB0_1093:
  9754  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9755  	JE   LBB0_1553
  9756  
  9757  LBB0_1094:
  9758  	LONG $0x042cfac5; BYTE $0xb2 // vcvttss2si    eax, dword [rdx + 4*rsi]
  9759  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  9760  	LONG $0x01c68348             // add    rsi, 1
  9761  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9762  	JNE  LBB0_1094
  9763  	JMP  LBB0_1553
  9764  
  9765  LBB0_1095:
  9766  	WORD $0xff31 // xor    edi, edi
  9767  
  9768  LBB0_1096:
  9769  	LONG $0x01c0f641               // test    r8b, 1
  9770  	JE   LBB0_1098
  9771  	LONG $0x045bfec5; BYTE $0xba   // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
  9772  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
  9773  	LONG $0xc16bf9c5               // vpackssdw    xmm0, xmm0, xmm1
  9774  	LONG $0x4c5bfec5; WORD $0x20ba // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
  9775  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
  9776  	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
  9777  	LONG $0x545bfec5; WORD $0x40ba // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
  9778  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
  9779  	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
  9780  	LONG $0x5c5bfec5; WORD $0x60ba // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
  9781  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
  9782  	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
  9783  	LONG $0x047ffac5; BYTE $0x79   // vmovdqu    oword [rcx + 2*rdi], xmm0
  9784  	LONG $0x4c7ffac5; WORD $0x1079 // vmovdqu    oword [rcx + 2*rdi + 16], xmm1
  9785  	LONG $0x547ffac5; WORD $0x2079 // vmovdqu    oword [rcx + 2*rdi + 32], xmm2
  9786  	LONG $0x5c7ffac5; WORD $0x3079 // vmovdqu    oword [rcx + 2*rdi + 48], xmm3
  9787  
  9788  LBB0_1098:
  9789  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9790  	JE   LBB0_1553
  9791  
  9792  LBB0_1099:
  9793  	LONG $0x042cfac5; BYTE $0xb2 // vcvttss2si    eax, dword [rdx + 4*rsi]
  9794  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  9795  	LONG $0x01c68348             // add    rsi, 1
  9796  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9797  	JNE  LBB0_1099
  9798  	JMP  LBB0_1553
  9799  
  9800  LBB0_1100:
  9801  	WORD $0xff31 // xor    edi, edi
  9802  
  9803  LBB0_1101:
  9804  	LONG $0x01c0f641                           // test    r8b, 1
  9805  	JE   LBB0_1103
  9806  	LONG $0x357de2c4; WORD $0xba04             // vpmovzxdq    ymm0, oword [rdx + 4*rdi]
  9807  	LONG $0x357de2c4; WORD $0xba4c; BYTE $0x10 // vpmovzxdq    ymm1, oword [rdx + 4*rdi + 16]
  9808  	LONG $0x357de2c4; WORD $0xba54; BYTE $0x20 // vpmovzxdq    ymm2, oword [rdx + 4*rdi + 32]
  9809  	LONG $0x357de2c4; WORD $0xba5c; BYTE $0x30 // vpmovzxdq    ymm3, oword [rdx + 4*rdi + 48]
  9810  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
  9811  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
  9812  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
  9813  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
  9814  
  9815  LBB0_1103:
  9816  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9817  	JE   LBB0_1553
  9818  
  9819  LBB0_1104:
  9820  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  9821  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  9822  	LONG $0x01c68348         // add    rsi, 1
  9823  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  9824  	JNE  LBB0_1104
  9825  	JMP  LBB0_1553
  9826  
  9827  LBB0_1105:
  9828  	WORD $0xff31 // xor    edi, edi
  9829  
  9830  LBB0_1106:
  9831  	LONG $0x01c0f641               // test    r8b, 1
  9832  	JE   LBB0_1108
  9833  	LONG $0x046ffec5; BYTE $0xba   // vmovdqu    ymm0, yword [rdx + 4*rdi]
  9834  	LONG $0x4c6ffec5; WORD $0x20ba // vmovdqu    ymm1, yword [rdx + 4*rdi + 32]
  9835  	LONG $0x546ffec5; WORD $0x40ba // vmovdqu    ymm2, yword [rdx + 4*rdi + 64]
  9836  	LONG $0x587de2c4; WORD $0x345d // vpbroadcastd    ymm3, dword 52[rbp] /* [rip + .LCPI0_13] */
  9837  	LONG $0x646ffec5; WORD $0x60ba // vmovdqu    ymm4, yword [rdx + 4*rdi + 96]
  9838  	LONG $0x0e7de3c4; WORD $0xaaeb // vpblendw    ymm5, ymm0, ymm3, 170
  9839  	LONG $0x587de2c4; WORD $0x3875 // vpbroadcastd    ymm6, dword 56[rbp] /* [rip + .LCPI0_14] */
  9840  	LONG $0xd072fdc5; BYTE $0x10   // vpsrld    ymm0, ymm0, 16
  9841  	LONG $0x0e7de3c4; WORD $0xaac6 // vpblendw    ymm0, ymm0, ymm6, 170
  9842  	LONG $0x187de2c4; WORD $0x3c7d // vbroadcastss    ymm7, dword 60[rbp] /* [rip + .LCPI0_15] */
  9843  	LONG $0xc75cfcc5               // vsubps    ymm0, ymm0, ymm7
  9844  	LONG $0xc058d4c5               // vaddps    ymm0, ymm5, ymm0
  9845  	LONG $0x0e75e3c4; WORD $0xaaeb // vpblendw    ymm5, ymm1, ymm3, 170
  9846  	LONG $0xd172f5c5; BYTE $0x10   // vpsrld    ymm1, ymm1, 16
  9847  	LONG $0x0e75e3c4; WORD $0xaace // vpblendw    ymm1, ymm1, ymm6, 170
  9848  	LONG $0xcf5cf4c5               // vsubps    ymm1, ymm1, ymm7
  9849  	LONG $0xc958d4c5               // vaddps    ymm1, ymm5, ymm1
  9850  	LONG $0x0e6de3c4; WORD $0xaaeb // vpblendw    ymm5, ymm2, ymm3, 170
  9851  	LONG $0xd272edc5; BYTE $0x10   // vpsrld    ymm2, ymm2, 16
  9852  	LONG $0x0e6de3c4; WORD $0xaad6 // vpblendw    ymm2, ymm2, ymm6, 170
  9853  	LONG $0xd75cecc5               // vsubps    ymm2, ymm2, ymm7
  9854  	LONG $0xd258d4c5               // vaddps    ymm2, ymm5, ymm2
  9855  	LONG $0x0e5de3c4; WORD $0xaadb // vpblendw    ymm3, ymm4, ymm3, 170
  9856  	LONG $0xd472ddc5; BYTE $0x10   // vpsrld    ymm4, ymm4, 16
  9857  	LONG $0x0e5de3c4; WORD $0xaae6 // vpblendw    ymm4, ymm4, ymm6, 170
  9858  	LONG $0xe75cdcc5               // vsubps    ymm4, ymm4, ymm7
  9859  	LONG $0xdc58e4c5               // vaddps    ymm3, ymm3, ymm4
  9860  	LONG $0x0411fcc5; BYTE $0xb9   // vmovups    yword [rcx + 4*rdi], ymm0
  9861  	LONG $0x4c11fcc5; WORD $0x20b9 // vmovups    yword [rcx + 4*rdi + 32], ymm1
  9862  	LONG $0x5411fcc5; WORD $0x40b9 // vmovups    yword [rcx + 4*rdi + 64], ymm2
  9863  	LONG $0x5c11fcc5; WORD $0x60b9 // vmovups    yword [rcx + 4*rdi + 96], ymm3
  9864  
  9865  LBB0_1108:
  9866  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9867  	JE   LBB0_1553
  9868  
  9869  LBB0_1109:
  9870  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  9871  	LONG $0x2abae1c4; BYTE $0xc0 // vcvtsi2ss    xmm0, xmm8, rax
  9872  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  9873  	LONG $0x01c68348             // add    rsi, 1
  9874  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9875  	JNE  LBB0_1109
  9876  	JMP  LBB0_1553
  9877  
  9878  LBB0_1110:
  9879  	WORD $0xff31 // xor    edi, edi
  9880  
  9881  LBB0_1111:
  9882  	LONG $0x01c0f641                           // test    r8b, 1
  9883  	JE   LBB0_1113
  9884  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x08 // vcvttsd2si    rax, qword [rdx + 8*rdi + 8]
  9885  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  9886  	LONG $0x2cfbe1c4; WORD $0xfa04             // vcvttsd2si    rax, qword [rdx + 8*rdi]
  9887  	LONG $0x6ef9e1c4; BYTE $0xc8               // vmovq    xmm1, rax
  9888  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
  9889  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x18 // vcvttsd2si    rax, qword [rdx + 8*rdi + 24]
  9890  	LONG $0x6ef9e1c4; BYTE $0xc8               // vmovq    xmm1, rax
  9891  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x10 // vcvttsd2si    rax, qword [rdx + 8*rdi + 16]
  9892  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  9893  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x38 // vcvttsd2si    rax, qword [rdx + 8*rdi + 56]
  9894  	LONG $0xc96ce9c5                           // vpunpcklqdq    xmm1, xmm2, xmm1
  9895  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x30 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 48]
  9896  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
  9897  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x28 // vcvttsd2si    rax, qword [rdx + 8*rdi + 40]
  9898  	LONG $0x6ef9e1c4; BYTE $0xdb               // vmovq    xmm3, rbx
  9899  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x20 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 32]
  9900  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
  9901  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
  9902  	LONG $0x6ef9e1c4; BYTE $0xe3               // vmovq    xmm4, rbx
  9903  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
  9904  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x58 // vcvttsd2si    rax, qword [rdx + 8*rdi + 88]
  9905  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
  9906  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x50 // vcvttsd2si    rax, qword [rdx + 8*rdi + 80]
  9907  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  9908  	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
  9909  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x48 // vcvttsd2si    rax, qword [rdx + 8*rdi + 72]
  9910  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
  9911  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x40 // vcvttsd2si    rax, qword [rdx + 8*rdi + 64]
  9912  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  9913  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x78 // vcvttsd2si    rax, qword [rdx + 8*rdi + 120]
  9914  	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
  9915  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x70 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 112]
  9916  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
  9917  	LONG $0x2cfbe1c4; WORD $0xfa44; BYTE $0x68 // vcvttsd2si    rax, qword [rdx + 8*rdi + 104]
  9918  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
  9919  	LONG $0x2cfbe1c4; WORD $0xfa5c; BYTE $0x60 // vcvttsd2si    rbx, qword [rdx + 8*rdi + 96]
  9920  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
  9921  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
  9922  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
  9923  	LONG $0xc06cc1c5                           // vpunpcklqdq    xmm0, xmm7, xmm0
  9924  	LONG $0x4c7ffac5; WORD $0x10f9             // vmovdqu    oword [rcx + 8*rdi + 16], xmm1
  9925  	LONG $0x047f7ac5; BYTE $0xf9               // vmovdqu    oword [rcx + 8*rdi], xmm8
  9926  	LONG $0x5c7ffac5; WORD $0x20f9             // vmovdqu    oword [rcx + 8*rdi + 32], xmm3
  9927  	LONG $0x547ffac5; WORD $0x30f9             // vmovdqu    oword [rcx + 8*rdi + 48], xmm2
  9928  	LONG $0x6c7ffac5; WORD $0x40f9             // vmovdqu    oword [rcx + 8*rdi + 64], xmm5
  9929  	LONG $0x647ffac5; WORD $0x50f9             // vmovdqu    oword [rcx + 8*rdi + 80], xmm4
  9930  	LONG $0x447ffac5; WORD $0x60f9             // vmovdqu    oword [rcx + 8*rdi + 96], xmm0
  9931  	LONG $0x747ffac5; WORD $0x70f9             // vmovdqu    oword [rcx + 8*rdi + 112], xmm6
  9932  
  9933  LBB0_1113:
  9934  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9935  	JE   LBB0_1553
  9936  
  9937  LBB0_1114:
  9938  	LONG $0x2cfbe1c4; WORD $0xf204 // vcvttsd2si    rax, qword [rdx + 8*rsi]
  9939  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  9940  	LONG $0x01c68348               // add    rsi, 1
  9941  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  9942  	JNE  LBB0_1114
  9943  	JMP  LBB0_1553
  9944  
  9945  LBB0_1115:
  9946  	WORD $0xff31 // xor    edi, edi
  9947  
  9948  LBB0_1116:
  9949  	LONG $0x01c0f641               // test    r8b, 1
  9950  	JE   LBB0_1118
  9951  	LONG $0x045afdc5; BYTE $0xfa   // vcvtpd2ps    xmm0, yword [rdx + 8*rdi]
  9952  	LONG $0x4c5afdc5; WORD $0x20fa // vcvtpd2ps    xmm1, yword [rdx + 8*rdi + 32]
  9953  	LONG $0x545afdc5; WORD $0x40fa // vcvtpd2ps    xmm2, yword [rdx + 8*rdi + 64]
  9954  	LONG $0x5c5afdc5; WORD $0x60fa // vcvtpd2ps    xmm3, yword [rdx + 8*rdi + 96]
  9955  	LONG $0x0411f9c5; BYTE $0xb9   // vmovupd    oword [rcx + 4*rdi], xmm0
  9956  	LONG $0x4c11f9c5; WORD $0x10b9 // vmovupd    oword [rcx + 4*rdi + 16], xmm1
  9957  	LONG $0x5411f9c5; WORD $0x20b9 // vmovupd    oword [rcx + 4*rdi + 32], xmm2
  9958  	LONG $0x5c11f9c5; WORD $0x30b9 // vmovupd    oword [rcx + 4*rdi + 48], xmm3
  9959  
  9960  LBB0_1118:
  9961  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9962  	JE   LBB0_1553
  9963  
  9964  LBB0_1119:
  9965  	LONG $0x0410fbc5; BYTE $0xf2 // vmovsd    xmm0, qword [rdx + 8*rsi]
  9966  	LONG $0xc05afbc5             // vcvtsd2ss    xmm0, xmm0, xmm0
  9967  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
  9968  	LONG $0x01c68348             // add    rsi, 1
  9969  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  9970  	JNE  LBB0_1119
  9971  	JMP  LBB0_1553
  9972  
  9973  LBB0_1120:
  9974  	WORD $0xff31 // xor    edi, edi
  9975  
  9976  LBB0_1121:
  9977  	LONG $0x01c0f641                           // test    r8b, 1
  9978  	JE   LBB0_1123
  9979  	LONG $0x337de2c4; WORD $0x7a04             // vpmovzxwd    ymm0, oword [rdx + 2*rdi]
  9980  	LONG $0x337de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovzxwd    ymm1, oword [rdx + 2*rdi + 16]
  9981  	LONG $0x337de2c4; WORD $0x7a54; BYTE $0x20 // vpmovzxwd    ymm2, oword [rdx + 2*rdi + 32]
  9982  	LONG $0x337de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovzxwd    ymm3, oword [rdx + 2*rdi + 48]
  9983  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
  9984  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
  9985  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
  9986  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
  9987  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
  9988  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
  9989  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
  9990  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
  9991  
  9992  LBB0_1123:
  9993  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9994  	JE   LBB0_1553
  9995  
  9996  LBB0_1124:
  9997  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  9998  	LONG $0xc02adac5             // vcvtsi2ss    xmm0, xmm4, eax
  9999  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
 10000  	LONG $0x01c68348             // add    rsi, 1
 10001  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 10002  	JNE  LBB0_1124
 10003  	JMP  LBB0_1553
 10004  
 10005  LBB0_1125:
 10006  	WORD $0xff31 // xor    edi, edi
 10007  
 10008  LBB0_1126:
 10009  	LONG $0x01c0f641                           // test    r8b, 1
 10010  	JE   LBB0_1128
 10011  	LONG $0x247de2c4; WORD $0x7a04             // vpmovsxwq    ymm0, qword [rdx + 2*rdi]
 10012  	LONG $0x247de2c4; WORD $0x7a4c; BYTE $0x08 // vpmovsxwq    ymm1, qword [rdx + 2*rdi + 8]
 10013  	LONG $0x247de2c4; WORD $0x7a54; BYTE $0x10 // vpmovsxwq    ymm2, qword [rdx + 2*rdi + 16]
 10014  	LONG $0x247de2c4; WORD $0x7a5c; BYTE $0x18 // vpmovsxwq    ymm3, qword [rdx + 2*rdi + 24]
 10015  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
 10016  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
 10017  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
 10018  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
 10019  
 10020  LBB0_1128:
 10021  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10022  	JE   LBB0_1553
 10023  
 10024  LBB0_1129:
 10025  	LONG $0x04bf0f48; BYTE $0x72 // movsx    rax, word [rdx + 2*rsi]
 10026  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 10027  	LONG $0x01c68348             // add    rsi, 1
 10028  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 10029  	JNE  LBB0_1129
 10030  	JMP  LBB0_1553
 10031  
 10032  LBB0_1130:
 10033  	WORD $0xff31 // xor    edi, edi
 10034  
 10035  LBB0_1131:
 10036  	LONG $0x01c0f641                           // test    r8b, 1
 10037  	JE   LBB0_1133
 10038  	LONG $0x237de2c4; WORD $0x7a04             // vpmovsxwd    ymm0, oword [rdx + 2*rdi]
 10039  	LONG $0x237de2c4; WORD $0x7a4c; BYTE $0x10 // vpmovsxwd    ymm1, oword [rdx + 2*rdi + 16]
 10040  	LONG $0x237de2c4; WORD $0x7a54; BYTE $0x20 // vpmovsxwd    ymm2, oword [rdx + 2*rdi + 32]
 10041  	LONG $0x237de2c4; WORD $0x7a5c; BYTE $0x30 // vpmovsxwd    ymm3, oword [rdx + 2*rdi + 48]
 10042  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
 10043  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
 10044  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
 10045  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
 10046  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
 10047  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
 10048  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
 10049  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
 10050  
 10051  LBB0_1133:
 10052  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10053  	JE   LBB0_1553
 10054  
 10055  LBB0_1134:
 10056  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
 10057  	LONG $0xc02adac5             // vcvtsi2ss    xmm0, xmm4, eax
 10058  	LONG $0x0411fac5; BYTE $0xb1 // vmovss    dword [rcx + 4*rsi], xmm0
 10059  	LONG $0x01c68348             // add    rsi, 1
 10060  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 10061  	JNE  LBB0_1134
 10062  	JMP  LBB0_1553
 10063  
 10064  LBB0_1135:
 10065  	WORD $0xff31 // xor    edi, edi
 10066  
 10067  LBB0_1136:
 10068  	LONG $0x01c0f641               // test    r8b, 1
 10069  	JE   LBB0_1138
 10070  	LONG $0x046ffac5; BYTE $0xfa   // vmovdqu    xmm0, oword [rdx + 8*rdi]
 10071  	LONG $0x16f9e3c4; WORD $0x01c0 // vpextrq    rax, xmm0, 1
 10072  	LONG $0x4c6ffac5; WORD $0x10fa // vmovdqu    xmm1, oword [rdx + 8*rdi + 16]
 10073  	LONG $0x2abae1c4; BYTE $0xd0   // vcvtsi2ss    xmm2, xmm8, rax
 10074  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
 10075  	LONG $0x2abae1c4; BYTE $0xc0   // vcvtsi2ss    xmm0, xmm8, rax
 10076  	LONG $0x7ef9e1c4; BYTE $0xc8   // vmovq    rax, xmm1
 10077  	LONG $0x2abae1c4; BYTE $0xd8   // vcvtsi2ss    xmm3, xmm8, rax
 10078  	LONG $0x16f9e3c4; WORD $0x01c8 // vpextrq    rax, xmm1, 1
 10079  	LONG $0x2abae1c4; BYTE $0xc8   // vcvtsi2ss    xmm1, xmm8, rax
 10080  	LONG $0x646ffac5; WORD $0x20fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 32]
 10081  	LONG $0x6c6ffac5; WORD $0x30fa // vmovdqu    xmm5, oword [rdx + 8*rdi + 48]
 10082  	LONG $0x16f9e3c4; WORD $0x01e0 // vpextrq    rax, xmm4, 1
 10083  	LONG $0x2179e3c4; WORD $0x10c2 // vinsertps    xmm0, xmm0, xmm2, 16
 10084  	LONG $0x2abae1c4; BYTE $0xd0   // vcvtsi2ss    xmm2, xmm8, rax
 10085  	LONG $0x7ef9e1c4; BYTE $0xe0   // vmovq    rax, xmm4
 10086  	LONG $0x2abae1c4; BYTE $0xe0   // vcvtsi2ss    xmm4, xmm8, rax
 10087  	LONG $0x7ef9e1c4; BYTE $0xe8   // vmovq    rax, xmm5
 10088  	LONG $0x2abae1c4; BYTE $0xf0   // vcvtsi2ss    xmm6, xmm8, rax
 10089  	LONG $0x2179e3c4; WORD $0x20c3 // vinsertps    xmm0, xmm0, xmm3, 32
 10090  	LONG $0x2179e3c4; WORD $0x30c1 // vinsertps    xmm0, xmm0, xmm1, 48
 10091  	LONG $0x16f9e3c4; WORD $0x01e8 // vpextrq    rax, xmm5, 1
 10092  	LONG $0x2159e3c4; WORD $0x10ca // vinsertps    xmm1, xmm4, xmm2, 16
 10093  	LONG $0x2abae1c4; BYTE $0xd0   // vcvtsi2ss    xmm2, xmm8, rax
 10094  	LONG $0x2171e3c4; WORD $0x20ce // vinsertps    xmm1, xmm1, xmm6, 32
 10095  	LONG $0x5c6ffac5; WORD $0x40fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
 10096  	LONG $0x16f9e3c4; WORD $0x01d8 // vpextrq    rax, xmm3, 1
 10097  	LONG $0x2abae1c4; BYTE $0xe0   // vcvtsi2ss    xmm4, xmm8, rax
 10098  	LONG $0x7ef9e1c4; BYTE $0xd8   // vmovq    rax, xmm3
 10099  	LONG $0x2abae1c4; BYTE $0xd8   // vcvtsi2ss    xmm3, xmm8, rax
 10100  	LONG $0x6c6ffac5; WORD $0x50fa // vmovdqu    xmm5, oword [rdx + 8*rdi + 80]
 10101  	LONG $0x7ef9e1c4; BYTE $0xe8   // vmovq    rax, xmm5
 10102  	LONG $0x2abae1c4; BYTE $0xf0   // vcvtsi2ss    xmm6, xmm8, rax
 10103  	LONG $0x2171e3c4; WORD $0x30ca // vinsertps    xmm1, xmm1, xmm2, 48
 10104  	LONG $0x2161e3c4; WORD $0x10d4 // vinsertps    xmm2, xmm3, xmm4, 16
 10105  	LONG $0x16f9e3c4; WORD $0x01e8 // vpextrq    rax, xmm5, 1
 10106  	LONG $0x2169e3c4; WORD $0x20d6 // vinsertps    xmm2, xmm2, xmm6, 32
 10107  	LONG $0x2abae1c4; BYTE $0xd8   // vcvtsi2ss    xmm3, xmm8, rax
 10108  	LONG $0x2169e3c4; WORD $0x30d3 // vinsertps    xmm2, xmm2, xmm3, 48
 10109  	LONG $0x5c6ffac5; WORD $0x60fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 96]
 10110  	LONG $0x16f9e3c4; WORD $0x01d8 // vpextrq    rax, xmm3, 1
 10111  	LONG $0x2abae1c4; BYTE $0xe0   // vcvtsi2ss    xmm4, xmm8, rax
 10112  	LONG $0x7ef9e1c4; BYTE $0xd8   // vmovq    rax, xmm3
 10113  	LONG $0x2abae1c4; BYTE $0xd8   // vcvtsi2ss    xmm3, xmm8, rax
 10114  	LONG $0x6c6ffac5; WORD $0x70fa // vmovdqu    xmm5, oword [rdx + 8*rdi + 112]
 10115  	LONG $0x7ef9e1c4; BYTE $0xe8   // vmovq    rax, xmm5
 10116  	LONG $0x2abae1c4; BYTE $0xf0   // vcvtsi2ss    xmm6, xmm8, rax
 10117  	LONG $0x2161e3c4; WORD $0x10dc // vinsertps    xmm3, xmm3, xmm4, 16
 10118  	LONG $0x2161e3c4; WORD $0x20de // vinsertps    xmm3, xmm3, xmm6, 32
 10119  	LONG $0x16f9e3c4; WORD $0x01e8 // vpextrq    rax, xmm5, 1
 10120  	LONG $0x2abae1c4; BYTE $0xe0   // vcvtsi2ss    xmm4, xmm8, rax
 10121  	LONG $0x2161e3c4; WORD $0x30dc // vinsertps    xmm3, xmm3, xmm4, 48
 10122  	LONG $0x0411f8c5; BYTE $0xb9   // vmovups    oword [rcx + 4*rdi], xmm0
 10123  	LONG $0x4c11f8c5; WORD $0x10b9 // vmovups    oword [rcx + 4*rdi + 16], xmm1
 10124  	LONG $0x5411f8c5; WORD $0x20b9 // vmovups    oword [rcx + 4*rdi + 32], xmm2
 10125  	LONG $0x5c11f8c5; WORD $0x30b9 // vmovups    oword [rcx + 4*rdi + 48], xmm3
 10126  
 10127  LBB0_1138:
 10128  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10129  	JE   LBB0_1553
 10130  
 10131  LBB0_1139:
 10132  	LONG $0x2abae1c4; WORD $0xf204 // vcvtsi2ss    xmm0, xmm8, qword [rdx + 8*rsi]
 10133  	LONG $0x0411fac5; BYTE $0xb1   // vmovss    dword [rcx + 4*rsi], xmm0
 10134  	LONG $0x01c68348               // add    rsi, 1
 10135  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 10136  	JNE  LBB0_1139
 10137  	JMP  LBB0_1553
 10138  
 10139  LBB0_1140:
 10140  	WORD $0xff31 // xor    edi, edi
 10141  
 10142  LBB0_1141:
 10143  	LONG $0x01c0f641                           // test    r8b, 1
 10144  	JE   LBB0_1143
 10145  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x04 // vcvttss2si    rax, dword [rdx + 4*rdi + 4]
 10146  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
 10147  	LONG $0x2cfae1c4; WORD $0xba04             // vcvttss2si    rax, dword [rdx + 4*rdi]
 10148  	LONG $0x6ef9e1c4; BYTE $0xc8               // vmovq    xmm1, rax
 10149  	LONG $0xc06c71c5                           // vpunpcklqdq    xmm8, xmm1, xmm0
 10150  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x0c // vcvttss2si    rax, dword [rdx + 4*rdi + 12]
 10151  	LONG $0x6ef9e1c4; BYTE $0xc8               // vmovq    xmm1, rax
 10152  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x08 // vcvttss2si    rax, dword [rdx + 4*rdi + 8]
 10153  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
 10154  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x1c // vcvttss2si    rax, dword [rdx + 4*rdi + 28]
 10155  	LONG $0xc96ce9c5                           // vpunpcklqdq    xmm1, xmm2, xmm1
 10156  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x18 // vcvttss2si    rbx, dword [rdx + 4*rdi + 24]
 10157  	LONG $0x6ef9e1c4; BYTE $0xd0               // vmovq    xmm2, rax
 10158  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x14 // vcvttss2si    rax, dword [rdx + 4*rdi + 20]
 10159  	LONG $0x6ef9e1c4; BYTE $0xdb               // vmovq    xmm3, rbx
 10160  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x10 // vcvttss2si    rbx, dword [rdx + 4*rdi + 16]
 10161  	LONG $0xd26ce1c5                           // vpunpcklqdq    xmm2, xmm3, xmm2
 10162  	LONG $0x6ef9e1c4; BYTE $0xd8               // vmovq    xmm3, rax
 10163  	LONG $0x6ef9e1c4; BYTE $0xe3               // vmovq    xmm4, rbx
 10164  	LONG $0xdb6cd9c5                           // vpunpcklqdq    xmm3, xmm4, xmm3
 10165  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x2c // vcvttss2si    rax, dword [rdx + 4*rdi + 44]
 10166  	LONG $0x6ef9e1c4; BYTE $0xe0               // vmovq    xmm4, rax
 10167  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x28 // vcvttss2si    rax, dword [rdx + 4*rdi + 40]
 10168  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
 10169  	LONG $0xe46cd1c5                           // vpunpcklqdq    xmm4, xmm5, xmm4
 10170  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x24 // vcvttss2si    rax, dword [rdx + 4*rdi + 36]
 10171  	LONG $0x6ef9e1c4; BYTE $0xe8               // vmovq    xmm5, rax
 10172  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x20 // vcvttss2si    rax, dword [rdx + 4*rdi + 32]
 10173  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
 10174  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x3c // vcvttss2si    rax, dword [rdx + 4*rdi + 60]
 10175  	LONG $0xed6cc9c5                           // vpunpcklqdq    xmm5, xmm6, xmm5
 10176  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x38 // vcvttss2si    rbx, dword [rdx + 4*rdi + 56]
 10177  	LONG $0x6ef9e1c4; BYTE $0xf0               // vmovq    xmm6, rax
 10178  	LONG $0x2cfae1c4; WORD $0xba44; BYTE $0x34 // vcvttss2si    rax, dword [rdx + 4*rdi + 52]
 10179  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
 10180  	LONG $0x2cfae1c4; WORD $0xba5c; BYTE $0x30 // vcvttss2si    rbx, dword [rdx + 4*rdi + 48]
 10181  	LONG $0x6ef9e1c4; BYTE $0xc0               // vmovq    xmm0, rax
 10182  	LONG $0xf66cc1c5                           // vpunpcklqdq    xmm6, xmm7, xmm6
 10183  	LONG $0x6ef9e1c4; BYTE $0xfb               // vmovq    xmm7, rbx
 10184  	LONG $0xc06cc1c5                           // vpunpcklqdq    xmm0, xmm7, xmm0
 10185  	LONG $0x4c7ffac5; WORD $0x10f9             // vmovdqu    oword [rcx + 8*rdi + 16], xmm1
 10186  	LONG $0x047f7ac5; BYTE $0xf9               // vmovdqu    oword [rcx + 8*rdi], xmm8
 10187  	LONG $0x5c7ffac5; WORD $0x20f9             // vmovdqu    oword [rcx + 8*rdi + 32], xmm3
 10188  	LONG $0x547ffac5; WORD $0x30f9             // vmovdqu    oword [rcx + 8*rdi + 48], xmm2
 10189  	LONG $0x6c7ffac5; WORD $0x40f9             // vmovdqu    oword [rcx + 8*rdi + 64], xmm5
 10190  	LONG $0x647ffac5; WORD $0x50f9             // vmovdqu    oword [rcx + 8*rdi + 80], xmm4
 10191  	LONG $0x447ffac5; WORD $0x60f9             // vmovdqu    oword [rcx + 8*rdi + 96], xmm0
 10192  	LONG $0x747ffac5; WORD $0x70f9             // vmovdqu    oword [rcx + 8*rdi + 112], xmm6
 10193  
 10194  LBB0_1143:
 10195  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10196  	JE   LBB0_1553
 10197  
 10198  LBB0_1144:
 10199  	LONG $0x2cfae1c4; WORD $0xb204 // vcvttss2si    rax, dword [rdx + 4*rsi]
 10200  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
 10201  	LONG $0x01c68348               // add    rsi, 1
 10202  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 10203  	JNE  LBB0_1144
 10204  
 10205  LBB0_1553:
 10206  	VZEROUPPER
 10207  	RET
 10208  
 10209  LBB0_1145:
 10210  	LONG $0xfce78348         // and    rdi, -4
 10211  	WORD $0xf748; BYTE $0xdf // neg    rdi
 10212  	WORD $0xc031             // xor    eax, eax
 10213  
 10214  LBB0_1146:
 10215  	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
 10216  	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
 10217  	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
 10218  	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
 10219  	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
 10220  	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
 10221  	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
 10222  	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
 10223  	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
 10224  	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
 10225  	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
 10226  	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
 10227  	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
 10228  	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
 10229  	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
 10230  	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
 10231  	LONG $0x80e88348                     // sub    rax, -128
 10232  	LONG $0x04c78348                     // add    rdi, 4
 10233  	JNE  LBB0_1146
 10234  
 10235  LBB0_1147:
 10236  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 10237  	JE   LBB0_1150
 10238  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 10239  	LONG $0x20c08348         // add    rax, 32
 10240  	WORD $0xf749; BYTE $0xd8 // neg    r8
 10241  
 10242  LBB0_1149:
 10243  	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
 10244  	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
 10245  	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
 10246  	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
 10247  	LONG $0x40c08348               // add    rax, 64
 10248  	WORD $0xff49; BYTE $0xc0       // inc    r8
 10249  	JNE  LBB0_1149
 10250  
 10251  LBB0_1150:
 10252  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10253  	JE   LBB0_1553
 10254  	JMP  LBB0_1151
 10255  
 10256  LBB0_1155:
 10257  	LONG $0xfce78348         // and    rdi, -4
 10258  	WORD $0xf748; BYTE $0xdf // neg    rdi
 10259  	WORD $0xc031             // xor    eax, eax
 10260  
 10261  LBB0_1156:
 10262  	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
 10263  	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
 10264  	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
 10265  	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
 10266  	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
 10267  	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
 10268  	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
 10269  	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
 10270  	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
 10271  	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
 10272  	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
 10273  	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
 10274  	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
 10275  	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
 10276  	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
 10277  	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
 10278  	LONG $0x80e88348                     // sub    rax, -128
 10279  	LONG $0x04c78348                     // add    rdi, 4
 10280  	JNE  LBB0_1156
 10281  
 10282  LBB0_1157:
 10283  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 10284  	JE   LBB0_1160
 10285  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 10286  	LONG $0x20c08348         // add    rax, 32
 10287  	WORD $0xf749; BYTE $0xd8 // neg    r8
 10288  
 10289  LBB0_1159:
 10290  	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
 10291  	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
 10292  	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
 10293  	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
 10294  	LONG $0x40c08348               // add    rax, 64
 10295  	WORD $0xff49; BYTE $0xc0       // inc    r8
 10296  	JNE  LBB0_1159
 10297  
 10298  LBB0_1160:
 10299  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10300  	JE   LBB0_1553
 10301  	JMP  LBB0_1161
 10302  
 10303  LBB0_1165:
 10304  	LONG $0xfce78348         // and    rdi, -4
 10305  	WORD $0xf748; BYTE $0xdf // neg    rdi
 10306  	WORD $0xc031             // xor    eax, eax
 10307  
 10308  LBB0_1166:
 10309  	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
 10310  	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
 10311  	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
 10312  	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
 10313  	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
 10314  	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
 10315  	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
 10316  	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
 10317  	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
 10318  	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
 10319  	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
 10320  	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
 10321  	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
 10322  	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
 10323  	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
 10324  	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
 10325  	LONG $0x80e88348                     // sub    rax, -128
 10326  	LONG $0x04c78348                     // add    rdi, 4
 10327  	JNE  LBB0_1166
 10328  
 10329  LBB0_1167:
 10330  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 10331  	JE   LBB0_1170
 10332  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 10333  	LONG $0x20c08348         // add    rax, 32
 10334  	WORD $0xf749; BYTE $0xd8 // neg    r8
 10335  
 10336  LBB0_1169:
 10337  	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
 10338  	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
 10339  	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
 10340  	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
 10341  	LONG $0x40c08348               // add    rax, 64
 10342  	WORD $0xff49; BYTE $0xc0       // inc    r8
 10343  	JNE  LBB0_1169
 10344  
 10345  LBB0_1170:
 10346  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10347  	JE   LBB0_1553
 10348  	JMP  LBB0_1171
 10349  
 10350  LBB0_1175:
 10351  	LONG $0xfce78348         // and    rdi, -4
 10352  	WORD $0xf748; BYTE $0xdf // neg    rdi
 10353  	WORD $0xc031             // xor    eax, eax
 10354  
 10355  LBB0_1176:
 10356  	LONG $0x0410fcc5; BYTE $0x42         // vmovups    ymm0, yword [rdx + 2*rax]
 10357  	LONG $0x4c10fcc5; WORD $0x2042       // vmovups    ymm1, yword [rdx + 2*rax + 32]
 10358  	LONG $0x0411fcc5; BYTE $0x41         // vmovups    yword [rcx + 2*rax], ymm0
 10359  	LONG $0x4c11fcc5; WORD $0x2041       // vmovups    yword [rcx + 2*rax + 32], ymm1
 10360  	LONG $0x4410fcc5; WORD $0x4042       // vmovups    ymm0, yword [rdx + 2*rax + 64]
 10361  	LONG $0x4c10fcc5; WORD $0x6042       // vmovups    ymm1, yword [rdx + 2*rax + 96]
 10362  	LONG $0x4411fcc5; WORD $0x4041       // vmovups    yword [rcx + 2*rax + 64], ymm0
 10363  	LONG $0x4c11fcc5; WORD $0x6041       // vmovups    yword [rcx + 2*rax + 96], ymm1
 10364  	QUAD $0x000080428410fcc5; BYTE $0x00 // vmovups    ymm0, yword [rdx + 2*rax + 128]
 10365  	QUAD $0x0000a0428c10fcc5; BYTE $0x00 // vmovups    ymm1, yword [rdx + 2*rax + 160]
 10366  	QUAD $0x000080418411fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 128], ymm0
 10367  	QUAD $0x0000a0418c11fcc5; BYTE $0x00 // vmovups    yword [rcx + 2*rax + 160], ymm1
 10368  	QUAD $0x0000c0428410fdc5; BYTE $0x00 // vmovupd    ymm0, yword [rdx + 2*rax + 192]
 10369  	QUAD $0x0000e0428c10fdc5; BYTE $0x00 // vmovupd    ymm1, yword [rdx + 2*rax + 224]
 10370  	QUAD $0x0000c0418411fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 192], ymm0
 10371  	QUAD $0x0000e0418c11fdc5; BYTE $0x00 // vmovupd    yword [rcx + 2*rax + 224], ymm1
 10372  	LONG $0x80e88348                     // sub    rax, -128
 10373  	LONG $0x04c78348                     // add    rdi, 4
 10374  	JNE  LBB0_1176
 10375  
 10376  LBB0_1177:
 10377  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 10378  	JE   LBB0_1180
 10379  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 10380  	LONG $0x20c08348         // add    rax, 32
 10381  	WORD $0xf749; BYTE $0xd8 // neg    r8
 10382  
 10383  LBB0_1179:
 10384  	LONG $0x4410fdc5; WORD $0xe002 // vmovupd    ymm0, yword [rdx + rax - 32]
 10385  	LONG $0x0c10fdc5; BYTE $0x02   // vmovupd    ymm1, yword [rdx + rax]
 10386  	LONG $0x4411fdc5; WORD $0xe001 // vmovupd    yword [rcx + rax - 32], ymm0
 10387  	LONG $0x0c11fdc5; BYTE $0x01   // vmovupd    yword [rcx + rax], ymm1
 10388  	LONG $0x40c08348               // add    rax, 64
 10389  	WORD $0xff49; BYTE $0xc0       // inc    r8
 10390  	JNE  LBB0_1179
 10391  
 10392  LBB0_1180:
 10393  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10394  	JE   LBB0_1553
 10395  	JMP  LBB0_1181
 10396  
 10397  LBB0_1185:
 10398  	WORD $0xff31 // xor    edi, edi
 10399  
 10400  LBB0_1186:
 10401  	LONG $0x01c0f641               // test    r8b, 1
 10402  	JE   LBB0_1188
 10403  	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
 10404  	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
 10405  	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
 10406  	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
 10407  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
 10408  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
 10409  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
 10410  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
 10411  
 10412  LBB0_1188:
 10413  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10414  	JE   LBB0_1553
 10415  	JMP  LBB0_1189
 10416  
 10417  LBB0_1193:
 10418  	WORD $0xff31 // xor    edi, edi
 10419  
 10420  LBB0_1194:
 10421  	LONG $0x01c0f641                           // test    r8b, 1
 10422  	JE   LBB0_1196
 10423  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
 10424  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
 10425  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
 10426  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
 10427  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
 10428  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
 10429  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
 10430  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
 10431  
 10432  LBB0_1196:
 10433  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10434  	JE   LBB0_1553
 10435  	JMP  LBB0_1197
 10436  
 10437  LBB0_1201:
 10438  	WORD $0xff31 // xor    edi, edi
 10439  
 10440  LBB0_1202:
 10441  	LONG $0x01c0f641                           // test    r8b, 1
 10442  	JE   LBB0_1204
 10443  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
 10444  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
 10445  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
 10446  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
 10447  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
 10448  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
 10449  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
 10450  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
 10451  
 10452  LBB0_1204:
 10453  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10454  	JE   LBB0_1553
 10455  	JMP  LBB0_1205
 10456  
 10457  LBB0_1209:
 10458  	WORD $0xff31 // xor    edi, edi
 10459  
 10460  LBB0_1210:
 10461  	LONG $0x01c0f641               // test    r8b, 1
 10462  	JE   LBB0_1212
 10463  	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
 10464  	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
 10465  	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
 10466  	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
 10467  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
 10468  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
 10469  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
 10470  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
 10471  
 10472  LBB0_1212:
 10473  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10474  	JE   LBB0_1553
 10475  	JMP  LBB0_1213
 10476  
 10477  LBB0_1217:
 10478  	WORD $0xff31 // xor    edi, edi
 10479  
 10480  LBB0_1218:
 10481  	LONG $0x01c0f641               // test    r8b, 1
 10482  	JE   LBB0_1220
 10483  	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
 10484  	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
 10485  	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
 10486  	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
 10487  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
 10488  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 10489  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 10490  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 10491  
 10492  LBB0_1220:
 10493  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10494  	JE   LBB0_1553
 10495  	JMP  LBB0_1221
 10496  
 10497  LBB0_1225:
 10498  	WORD $0xff31 // xor    edi, edi
 10499  
 10500  LBB0_1226:
 10501  	LONG $0x01c0f641                           // test    r8b, 1
 10502  	JE   LBB0_1228
 10503  	LONG $0x2179e2c4; WORD $0x3a04             // vpmovsxbd    xmm0, dword [rdx + rdi]
 10504  	LONG $0x2179e2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbd    xmm1, dword [rdx + rdi + 4]
 10505  	LONG $0x2179e2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbd    xmm2, dword [rdx + rdi + 8]
 10506  	LONG $0x2179e2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbd    xmm3, dword [rdx + rdi + 12]
 10507  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
 10508  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
 10509  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
 10510  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
 10511  	LONG $0x0411fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm0
 10512  	LONG $0x4c11fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 10513  	LONG $0x5411fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 10514  	LONG $0x5c11fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 10515  
 10516  LBB0_1228:
 10517  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10518  	JE   LBB0_1553
 10519  	JMP  LBB0_1229
 10520  
 10521  LBB0_1233:
 10522  	WORD $0xff31 // xor    edi, edi
 10523  
 10524  LBB0_1234:
 10525  	LONG $0x01c0f641                           // test    r8b, 1
 10526  	JE   LBB0_1236
 10527  	LONG $0x3179e2c4; WORD $0x3a04             // vpmovzxbd    xmm0, dword [rdx + rdi]
 10528  	LONG $0x3179e2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbd    xmm1, dword [rdx + rdi + 4]
 10529  	LONG $0x3179e2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbd    xmm2, dword [rdx + rdi + 8]
 10530  	LONG $0x3179e2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbd    xmm3, dword [rdx + rdi + 12]
 10531  	LONG $0xc0e6fec5                           // vcvtdq2pd    ymm0, xmm0
 10532  	LONG $0xc9e6fec5                           // vcvtdq2pd    ymm1, xmm1
 10533  	LONG $0xd2e6fec5                           // vcvtdq2pd    ymm2, xmm2
 10534  	LONG $0xdbe6fec5                           // vcvtdq2pd    ymm3, xmm3
 10535  	LONG $0x0411fdc5; BYTE $0xf9               // vmovupd    yword [rcx + 8*rdi], ymm0
 10536  	LONG $0x4c11fdc5; WORD $0x20f9             // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 10537  	LONG $0x5411fdc5; WORD $0x40f9             // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 10538  	LONG $0x5c11fdc5; WORD $0x60f9             // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 10539  
 10540  LBB0_1236:
 10541  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10542  	JE   LBB0_1553
 10543  	JMP  LBB0_1237
 10544  
 10545  LBB0_1241:
 10546  	WORD $0xff31 // xor    edi, edi
 10547  
 10548  LBB0_1242:
 10549  	LONG $0x01c0f641               // test    r8b, 1
 10550  	JE   LBB0_1244
 10551  	LONG $0x456ff9c5; BYTE $0x70   // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
 10552  	LONG $0x0c6ffac5; BYTE $0xba   // vmovdqu    xmm1, oword [rdx + 4*rdi]
 10553  	LONG $0x546ffac5; WORD $0x10ba // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
 10554  	LONG $0x5c6ffac5; WORD $0x20ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
 10555  	LONG $0x646ffac5; WORD $0x30ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
 10556  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10557  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 10558  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 10559  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 10560  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10561  	LONG $0xd262e1c5               // vpunpckldq    xmm2, xmm3, xmm2
 10562  	LONG $0x5c6ffac5; WORD $0x50ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
 10563  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10564  	LONG $0x646ffac5; WORD $0x40ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
 10565  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 10566  	LONG $0xdb62d9c5               // vpunpckldq    xmm3, xmm4, xmm3
 10567  	LONG $0x646ffac5; WORD $0x70ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
 10568  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 10569  	LONG $0x6c6ffac5; WORD $0x60ba // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
 10570  	LONG $0x0051e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm5, xmm0
 10571  	LONG $0xc462f9c5               // vpunpckldq    xmm0, xmm0, xmm4
 10572  	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
 10573  	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
 10574  	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
 10575  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 10576  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 10577  
 10578  LBB0_1244:
 10579  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10580  	JE   LBB0_1553
 10581  	JMP  LBB0_1245
 10582  
 10583  LBB0_1249:
 10584  	WORD $0xff31 // xor    edi, edi
 10585  
 10586  LBB0_1250:
 10587  	LONG $0x01c0f641               // test    r8b, 1
 10588  	JE   LBB0_1252
 10589  	LONG $0x04e6fdc5; BYTE $0xfa   // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
 10590  	LONG $0xc06bf9c5               // vpackssdw    xmm0, xmm0, xmm0
 10591  	LONG $0xc063f9c5               // vpacksswb    xmm0, xmm0, xmm0
 10592  	LONG $0x4ce6fdc5; WORD $0x20fa // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
 10593  	LONG $0xc96bf1c5               // vpackssdw    xmm1, xmm1, xmm1
 10594  	LONG $0x54e6fdc5; WORD $0x40fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
 10595  	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
 10596  	LONG $0xc162f9c5               // vpunpckldq    xmm0, xmm0, xmm1
 10597  	LONG $0xca6be9c5               // vpackssdw    xmm1, xmm2, xmm2
 10598  	LONG $0xc963f1c5               // vpacksswb    xmm1, xmm1, xmm1
 10599  	LONG $0x54e6fdc5; WORD $0x60fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 96]
 10600  	LONG $0xd26be9c5               // vpackssdw    xmm2, xmm2, xmm2
 10601  	LONG $0xd263e9c5               // vpacksswb    xmm2, xmm2, xmm2
 10602  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 10603  	LONG $0xc16cf9c5               // vpunpcklqdq    xmm0, xmm0, xmm1
 10604  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 10605  
 10606  LBB0_1252:
 10607  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10608  	JE   LBB0_1553
 10609  	JMP  LBB0_1253
 10610  
 10611  LBB0_1257:
 10612  	WORD $0xff31 // xor    edi, edi
 10613  
 10614  LBB0_1258:
 10615  	LONG $0x01c0f641               // test    r8b, 1
 10616  	JE   LBB0_1260
 10617  	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
 10618  	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
 10619  	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
 10620  	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
 10621  	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
 10622  	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
 10623  	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
 10624  	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3
 10625  
 10626  LBB0_1260:
 10627  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10628  	JE   LBB0_1553
 10629  	JMP  LBB0_1261
 10630  
 10631  LBB0_1265:
 10632  	WORD $0xff31 // xor    edi, edi
 10633  
 10634  LBB0_1266:
 10635  	LONG $0x01c0f641               // test    r8b, 1
 10636  	JE   LBB0_1268
 10637  	LONG $0x456ff9c5; BYTE $0x40   // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
 10638  	LONG $0x0c6ffac5; BYTE $0xfa   // vmovdqu    xmm1, oword [rdx + 8*rdi]
 10639  	LONG $0x546ffac5; WORD $0x10fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
 10640  	LONG $0x5c6ffac5; WORD $0x20fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
 10641  	LONG $0x646ffac5; WORD $0x30fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
 10642  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10643  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 10644  	LONG $0xca61f1c5               // vpunpcklwd    xmm1, xmm1, xmm2
 10645  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 10646  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10647  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 10648  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 10649  	LONG $0x546ffac5; WORD $0x50fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
 10650  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10651  	LONG $0x5c6ffac5; WORD $0x40fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
 10652  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10653  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 10654  	LONG $0x5c6ffac5; WORD $0x70fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
 10655  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10656  	LONG $0x646ffac5; WORD $0x60fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
 10657  	LONG $0x0059e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm4, xmm0
 10658  	LONG $0xc361f9c5               // vpunpcklwd    xmm0, xmm0, xmm3
 10659  	LONG $0xc062e9c5               // vpunpckldq    xmm0, xmm2, xmm0
 10660  	LONG $0xc06cf1c5               // vpunpcklqdq    xmm0, xmm1, xmm0
 10661  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 10662  
 10663  LBB0_1268:
 10664  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10665  	JE   LBB0_1553
 10666  	JMP  LBB0_1269
 10667  
 10668  LBB0_1273:
 10669  	WORD $0xff31 // xor    edi, edi
 10670  
 10671  LBB0_1274:
 10672  	LONG $0x01c0f641               // test    r8b, 1
 10673  	JE   LBB0_1276
 10674  	QUAD $0x000000a0856ffdc5       // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
 10675  	LONG $0x0cdbfdc5; BYTE $0x7a   // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
 10676  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 10677  	LONG $0xca67f1c5               // vpackuswb    xmm1, xmm1, xmm2
 10678  	LONG $0x54dbfdc5; WORD $0x207a // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
 10679  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 10680  	LONG $0xd367e9c5               // vpackuswb    xmm2, xmm2, xmm3
 10681  	LONG $0x5cdbfdc5; WORD $0x407a // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
 10682  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 10683  	LONG $0xdc67e1c5               // vpackuswb    xmm3, xmm3, xmm4
 10684  	LONG $0x44dbfdc5; WORD $0x607a // vpand    ymm0, ymm0, yword [rdx + 2*rdi + 96]
 10685  	LONG $0x397de3c4; WORD $0x01c4 // vextracti128    xmm4, ymm0, 1
 10686  	LONG $0xc467f9c5               // vpackuswb    xmm0, xmm0, xmm4
 10687  	LONG $0x0c7ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm1
 10688  	LONG $0x547ffac5; WORD $0x1039 // vmovdqu    oword [rcx + rdi + 16], xmm2
 10689  	LONG $0x5c7ffac5; WORD $0x2039 // vmovdqu    oword [rcx + rdi + 32], xmm3
 10690  	LONG $0x447ffac5; WORD $0x3039 // vmovdqu    oword [rcx + rdi + 48], xmm0
 10691  
 10692  LBB0_1276:
 10693  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10694  	JE   LBB0_1553
 10695  	JMP  LBB0_1277
 10696  
 10697  LBB0_1281:
 10698  	WORD $0xff31 // xor    edi, edi
 10699  
 10700  LBB0_1282:
 10701  	LONG $0x01c0f641               // test    r8b, 1
 10702  	JE   LBB0_1284
 10703  	QUAD $0x000000a0856ffdc5       // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
 10704  	LONG $0x0cdbfdc5; BYTE $0x7a   // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
 10705  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 10706  	LONG $0xca67f1c5               // vpackuswb    xmm1, xmm1, xmm2
 10707  	LONG $0x54dbfdc5; WORD $0x207a // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
 10708  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 10709  	LONG $0xd367e9c5               // vpackuswb    xmm2, xmm2, xmm3
 10710  	LONG $0x5cdbfdc5; WORD $0x407a // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
 10711  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 10712  	LONG $0xdc67e1c5               // vpackuswb    xmm3, xmm3, xmm4
 10713  	LONG $0x44dbfdc5; WORD $0x607a // vpand    ymm0, ymm0, yword [rdx + 2*rdi + 96]
 10714  	LONG $0x397de3c4; WORD $0x01c4 // vextracti128    xmm4, ymm0, 1
 10715  	LONG $0xc467f9c5               // vpackuswb    xmm0, xmm0, xmm4
 10716  	LONG $0x0c7ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm1
 10717  	LONG $0x547ffac5; WORD $0x1039 // vmovdqu    oword [rcx + rdi + 16], xmm2
 10718  	LONG $0x5c7ffac5; WORD $0x2039 // vmovdqu    oword [rcx + rdi + 32], xmm3
 10719  	LONG $0x447ffac5; WORD $0x3039 // vmovdqu    oword [rcx + rdi + 48], xmm0
 10720  
 10721  LBB0_1284:
 10722  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10723  	JE   LBB0_1553
 10724  	JMP  LBB0_1285
 10725  
 10726  LBB0_1289:
 10727  	WORD $0xff31 // xor    edi, edi
 10728  
 10729  LBB0_1290:
 10730  	LONG $0x01c0f641               // test    r8b, 1
 10731  	JE   LBB0_1292
 10732  	LONG $0x456ff9c5; BYTE $0x40   // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
 10733  	LONG $0x0c6ffac5; BYTE $0xfa   // vmovdqu    xmm1, oword [rdx + 8*rdi]
 10734  	LONG $0x546ffac5; WORD $0x10fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
 10735  	LONG $0x5c6ffac5; WORD $0x20fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
 10736  	LONG $0x646ffac5; WORD $0x30fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
 10737  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10738  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 10739  	LONG $0xca61f1c5               // vpunpcklwd    xmm1, xmm1, xmm2
 10740  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 10741  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10742  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 10743  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 10744  	LONG $0x546ffac5; WORD $0x50fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
 10745  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10746  	LONG $0x5c6ffac5; WORD $0x40fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
 10747  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10748  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 10749  	LONG $0x5c6ffac5; WORD $0x70fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
 10750  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10751  	LONG $0x646ffac5; WORD $0x60fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
 10752  	LONG $0x0059e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm4, xmm0
 10753  	LONG $0xc361f9c5               // vpunpcklwd    xmm0, xmm0, xmm3
 10754  	LONG $0xc062e9c5               // vpunpckldq    xmm0, xmm2, xmm0
 10755  	LONG $0xc06cf1c5               // vpunpcklqdq    xmm0, xmm1, xmm0
 10756  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 10757  
 10758  LBB0_1292:
 10759  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10760  	JE   LBB0_1553
 10761  	JMP  LBB0_1293
 10762  
 10763  LBB0_1297:
 10764  	WORD $0xff31 // xor    edi, edi
 10765  
 10766  LBB0_1298:
 10767  	LONG $0x01c0f641               // test    r8b, 1
 10768  	JE   LBB0_1300
 10769  	LONG $0x045bfec5; BYTE $0xba   // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
 10770  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
 10771  	LONG $0xc16bf9c5               // vpackssdw    xmm0, xmm0, xmm1
 10772  	LONG $0x4c5bfec5; WORD $0x20ba // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
 10773  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 10774  	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
 10775  	LONG $0x545bfec5; WORD $0x40ba // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
 10776  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 10777  	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
 10778  	LONG $0x5c5bfec5; WORD $0x60ba // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
 10779  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 10780  	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
 10781  	LONG $0x386de3c4; WORD $0x01d3 // vinserti128    ymm2, ymm2, xmm3, 1
 10782  	LONG $0xd063edc5               // vpacksswb    ymm2, ymm2, ymm0
 10783  	LONG $0x387de3c4; WORD $0x01c1 // vinserti128    ymm0, ymm0, xmm1, 1
 10784  	LONG $0xc063fdc5               // vpacksswb    ymm0, ymm0, ymm0
 10785  	LONG $0xc26cfdc5               // vpunpcklqdq    ymm0, ymm0, ymm2
 10786  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 10787  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 10788  
 10789  LBB0_1300:
 10790  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10791  	JE   LBB0_1553
 10792  	JMP  LBB0_1301
 10793  
 10794  LBB0_1305:
 10795  	WORD $0xff31 // xor    edi, edi
 10796  
 10797  LBB0_1306:
 10798  	LONG $0x01c0f641               // test    r8b, 1
 10799  	JE   LBB0_1308
 10800  	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
 10801  	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
 10802  	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
 10803  	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
 10804  	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
 10805  	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
 10806  	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
 10807  	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3
 10808  
 10809  LBB0_1308:
 10810  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10811  	JE   LBB0_1553
 10812  	JMP  LBB0_1309
 10813  
 10814  LBB0_1313:
 10815  	WORD $0xff31 // xor    edi, edi
 10816  
 10817  LBB0_1314:
 10818  	LONG $0x01c0f641               // test    r8b, 1
 10819  	JE   LBB0_1316
 10820  	LONG $0x456ff9c5; BYTE $0x70   // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
 10821  	LONG $0x0c6ffac5; BYTE $0xba   // vmovdqu    xmm1, oword [rdx + 4*rdi]
 10822  	LONG $0x546ffac5; WORD $0x10ba // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
 10823  	LONG $0x5c6ffac5; WORD $0x20ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
 10824  	LONG $0x646ffac5; WORD $0x30ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
 10825  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 10826  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 10827  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 10828  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 10829  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10830  	LONG $0xd262e1c5               // vpunpckldq    xmm2, xmm3, xmm2
 10831  	LONG $0x5c6ffac5; WORD $0x50ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
 10832  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 10833  	LONG $0x646ffac5; WORD $0x40ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
 10834  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 10835  	LONG $0xdb62d9c5               // vpunpckldq    xmm3, xmm4, xmm3
 10836  	LONG $0x646ffac5; WORD $0x70ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
 10837  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 10838  	LONG $0x6c6ffac5; WORD $0x60ba // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
 10839  	LONG $0x0051e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm5, xmm0
 10840  	LONG $0xc462f9c5               // vpunpckldq    xmm0, xmm0, xmm4
 10841  	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
 10842  	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
 10843  	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
 10844  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 10845  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 10846  
 10847  LBB0_1316:
 10848  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10849  	JE   LBB0_1553
 10850  	JMP  LBB0_1317
 10851  
 10852  LBB0_1321:
 10853  	WORD $0xff31 // xor    edi, edi
 10854  
 10855  LBB0_1322:
 10856  	LONG $0x01c0f641                           // test    r8b, 1
 10857  	JE   LBB0_1324
 10858  	LONG $0x227de2c4; WORD $0x3a04             // vpmovsxbq    ymm0, dword [rdx + rdi]
 10859  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbq    ymm1, dword [rdx + rdi + 4]
 10860  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbq    ymm2, dword [rdx + rdi + 8]
 10861  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbq    ymm3, dword [rdx + rdi + 12]
 10862  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
 10863  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
 10864  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
 10865  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
 10866  
 10867  LBB0_1324:
 10868  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10869  	JE   LBB0_1553
 10870  	JMP  LBB0_1325
 10871  
 10872  LBB0_1329:
 10873  	WORD $0xff31 // xor    edi, edi
 10874  
 10875  LBB0_1330:
 10876  	LONG $0x01c0f641               // test    r8b, 1
 10877  	JE   LBB0_1332
 10878  	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
 10879  	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
 10880  	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
 10881  	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
 10882  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
 10883  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 10884  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 10885  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 10886  
 10887  LBB0_1332:
 10888  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10889  	JE   LBB0_1553
 10890  	JMP  LBB0_1333
 10891  
 10892  LBB0_1337:
 10893  	WORD $0xff31 // xor    edi, edi
 10894  
 10895  LBB0_1338:
 10896  	LONG $0x01c0f641               // test    r8b, 1
 10897  	JE   LBB0_1340
 10898  	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
 10899  	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
 10900  	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
 10901  	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
 10902  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
 10903  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 10904  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 10905  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 10906  
 10907  LBB0_1340:
 10908  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10909  	JE   LBB0_1553
 10910  	JMP  LBB0_1341
 10911  
 10912  LBB0_1345:
 10913  	WORD $0xff31 // xor    edi, edi
 10914  
 10915  LBB0_1346:
 10916  	LONG $0x01c0f641                           // test    r8b, 1
 10917  	JE   LBB0_1348
 10918  	LONG $0x327de2c4; WORD $0x3a04             // vpmovzxbq    ymm0, dword [rdx + rdi]
 10919  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbq    ymm1, dword [rdx + rdi + 4]
 10920  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbq    ymm2, dword [rdx + rdi + 8]
 10921  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbq    ymm3, dword [rdx + rdi + 12]
 10922  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
 10923  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
 10924  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
 10925  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
 10926  
 10927  LBB0_1348:
 10928  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10929  	JE   LBB0_1553
 10930  	JMP  LBB0_1349
 10931  
 10932  LBB0_1353:
 10933  	WORD $0xff31 // xor    edi, edi
 10934  
 10935  LBB0_1354:
 10936  	LONG $0x01c0f641                           // test    r8b, 1
 10937  	JE   LBB0_1356
 10938  	LONG $0x207de2c4; WORD $0x3a04             // vpmovsxbw    ymm0, oword [rdx + rdi]
 10939  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovsxbw    ymm1, oword [rdx + rdi + 16]
 10940  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x20 // vpmovsxbw    ymm2, oword [rdx + rdi + 32]
 10941  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovsxbw    ymm3, oword [rdx + rdi + 48]
 10942  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
 10943  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
 10944  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
 10945  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
 10946  
 10947  LBB0_1356:
 10948  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10949  	JE   LBB0_1553
 10950  	JMP  LBB0_1357
 10951  
 10952  LBB0_1361:
 10953  	WORD $0xff31 // xor    edi, edi
 10954  
 10955  LBB0_1362:
 10956  	LONG $0x01c0f641                           // test    r8b, 1
 10957  	JE   LBB0_1364
 10958  	LONG $0x207de2c4; WORD $0x3a04             // vpmovsxbw    ymm0, oword [rdx + rdi]
 10959  	LONG $0x207de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovsxbw    ymm1, oword [rdx + rdi + 16]
 10960  	LONG $0x207de2c4; WORD $0x3a54; BYTE $0x20 // vpmovsxbw    ymm2, oword [rdx + rdi + 32]
 10961  	LONG $0x207de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovsxbw    ymm3, oword [rdx + rdi + 48]
 10962  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
 10963  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
 10964  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
 10965  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
 10966  
 10967  LBB0_1364:
 10968  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10969  	JE   LBB0_1553
 10970  	JMP  LBB0_1365
 10971  
 10972  LBB0_1369:
 10973  	WORD $0xff31 // xor    edi, edi
 10974  
 10975  LBB0_1370:
 10976  	LONG $0x01c0f641                           // test    r8b, 1
 10977  	JE   LBB0_1372
 10978  	LONG $0x307de2c4; WORD $0x3a04             // vpmovzxbw    ymm0, oword [rdx + rdi]
 10979  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovzxbw    ymm1, oword [rdx + rdi + 16]
 10980  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x20 // vpmovzxbw    ymm2, oword [rdx + rdi + 32]
 10981  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovzxbw    ymm3, oword [rdx + rdi + 48]
 10982  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
 10983  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
 10984  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
 10985  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
 10986  
 10987  LBB0_1372:
 10988  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 10989  	JE   LBB0_1553
 10990  	JMP  LBB0_1373
 10991  
 10992  LBB0_1377:
 10993  	WORD $0xff31 // xor    edi, edi
 10994  
 10995  LBB0_1378:
 10996  	LONG $0x01c0f641                           // test    r8b, 1
 10997  	JE   LBB0_1380
 10998  	LONG $0x307de2c4; WORD $0x3a04             // vpmovzxbw    ymm0, oword [rdx + rdi]
 10999  	LONG $0x307de2c4; WORD $0x3a4c; BYTE $0x10 // vpmovzxbw    ymm1, oword [rdx + rdi + 16]
 11000  	LONG $0x307de2c4; WORD $0x3a54; BYTE $0x20 // vpmovzxbw    ymm2, oword [rdx + rdi + 32]
 11001  	LONG $0x307de2c4; WORD $0x3a5c; BYTE $0x30 // vpmovzxbw    ymm3, oword [rdx + rdi + 48]
 11002  	LONG $0x047ffec5; BYTE $0x79               // vmovdqu    yword [rcx + 2*rdi], ymm0
 11003  	LONG $0x4c7ffec5; WORD $0x2079             // vmovdqu    yword [rcx + 2*rdi + 32], ymm1
 11004  	LONG $0x547ffec5; WORD $0x4079             // vmovdqu    yword [rcx + 2*rdi + 64], ymm2
 11005  	LONG $0x5c7ffec5; WORD $0x6079             // vmovdqu    yword [rcx + 2*rdi + 96], ymm3
 11006  
 11007  LBB0_1380:
 11008  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11009  	JE   LBB0_1553
 11010  	JMP  LBB0_1381
 11011  
 11012  LBB0_1385:
 11013  	WORD $0xff31 // xor    edi, edi
 11014  
 11015  LBB0_1386:
 11016  	LONG $0x01c0f641                           // test    r8b, 1
 11017  	JE   LBB0_1388
 11018  	LONG $0x227de2c4; WORD $0x3a04             // vpmovsxbq    ymm0, dword [rdx + rdi]
 11019  	LONG $0x227de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovsxbq    ymm1, dword [rdx + rdi + 4]
 11020  	LONG $0x227de2c4; WORD $0x3a54; BYTE $0x08 // vpmovsxbq    ymm2, dword [rdx + rdi + 8]
 11021  	LONG $0x227de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovsxbq    ymm3, dword [rdx + rdi + 12]
 11022  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
 11023  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
 11024  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
 11025  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
 11026  
 11027  LBB0_1388:
 11028  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11029  	JE   LBB0_1553
 11030  	JMP  LBB0_1389
 11031  
 11032  LBB0_1393:
 11033  	WORD $0xff31 // xor    edi, edi
 11034  
 11035  LBB0_1394:
 11036  	LONG $0x01c0f641                           // test    r8b, 1
 11037  	JE   LBB0_1396
 11038  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
 11039  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
 11040  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
 11041  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
 11042  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
 11043  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
 11044  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
 11045  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
 11046  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
 11047  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
 11048  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
 11049  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
 11050  
 11051  LBB0_1396:
 11052  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11053  	JE   LBB0_1553
 11054  	JMP  LBB0_1397
 11055  
 11056  LBB0_1401:
 11057  	WORD $0xff31 // xor    edi, edi
 11058  
 11059  LBB0_1402:
 11060  	LONG $0x01c0f641               // test    r8b, 1
 11061  	JE   LBB0_1404
 11062  	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
 11063  	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
 11064  	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
 11065  	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
 11066  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
 11067  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 11068  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 11069  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 11070  
 11071  LBB0_1404:
 11072  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11073  	JE   LBB0_1553
 11074  	JMP  LBB0_1405
 11075  
 11076  LBB0_1409:
 11077  	WORD $0xff31 // xor    edi, edi
 11078  
 11079  LBB0_1410:
 11080  	LONG $0x01c0f641               // test    r8b, 1
 11081  	JE   LBB0_1412
 11082  	LONG $0x0410fdc5; BYTE $0xfa   // vmovupd    ymm0, yword [rdx + 8*rdi]
 11083  	LONG $0x4c10fdc5; WORD $0x20fa // vmovupd    ymm1, yword [rdx + 8*rdi + 32]
 11084  	LONG $0x5410fdc5; WORD $0x40fa // vmovupd    ymm2, yword [rdx + 8*rdi + 64]
 11085  	LONG $0x5c10fdc5; WORD $0x60fa // vmovupd    ymm3, yword [rdx + 8*rdi + 96]
 11086  	LONG $0x0411fdc5; BYTE $0xf9   // vmovupd    yword [rcx + 8*rdi], ymm0
 11087  	LONG $0x4c11fdc5; WORD $0x20f9 // vmovupd    yword [rcx + 8*rdi + 32], ymm1
 11088  	LONG $0x5411fdc5; WORD $0x40f9 // vmovupd    yword [rcx + 8*rdi + 64], ymm2
 11089  	LONG $0x5c11fdc5; WORD $0x60f9 // vmovupd    yword [rcx + 8*rdi + 96], ymm3
 11090  
 11091  LBB0_1412:
 11092  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11093  	JE   LBB0_1553
 11094  	JMP  LBB0_1413
 11095  
 11096  LBB0_1417:
 11097  	WORD $0xff31 // xor    edi, edi
 11098  
 11099  LBB0_1418:
 11100  	LONG $0x01c0f641               // test    r8b, 1
 11101  	JE   LBB0_1420
 11102  	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
 11103  	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
 11104  	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
 11105  	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
 11106  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
 11107  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
 11108  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
 11109  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
 11110  
 11111  LBB0_1420:
 11112  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11113  	JE   LBB0_1553
 11114  	JMP  LBB0_1421
 11115  
 11116  LBB0_1425:
 11117  	WORD $0xff31 // xor    edi, edi
 11118  
 11119  LBB0_1426:
 11120  	LONG $0x01c0f641                           // test    r8b, 1
 11121  	JE   LBB0_1428
 11122  	LONG $0x327de2c4; WORD $0x3a04             // vpmovzxbq    ymm0, dword [rdx + rdi]
 11123  	LONG $0x327de2c4; WORD $0x3a4c; BYTE $0x04 // vpmovzxbq    ymm1, dword [rdx + rdi + 4]
 11124  	LONG $0x327de2c4; WORD $0x3a54; BYTE $0x08 // vpmovzxbq    ymm2, dword [rdx + rdi + 8]
 11125  	LONG $0x327de2c4; WORD $0x3a5c; BYTE $0x0c // vpmovzxbq    ymm3, dword [rdx + rdi + 12]
 11126  	LONG $0x047ffec5; BYTE $0xf9               // vmovdqu    yword [rcx + 8*rdi], ymm0
 11127  	LONG $0x4c7ffec5; WORD $0x20f9             // vmovdqu    yword [rcx + 8*rdi + 32], ymm1
 11128  	LONG $0x547ffec5; WORD $0x40f9             // vmovdqu    yword [rcx + 8*rdi + 64], ymm2
 11129  	LONG $0x5c7ffec5; WORD $0x60f9             // vmovdqu    yword [rcx + 8*rdi + 96], ymm3
 11130  
 11131  LBB0_1428:
 11132  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11133  	JE   LBB0_1553
 11134  	JMP  LBB0_1429
 11135  
 11136  LBB0_1433:
 11137  	WORD $0xff31 // xor    edi, edi
 11138  
 11139  LBB0_1434:
 11140  	LONG $0x01c0f641                           // test    r8b, 1
 11141  	JE   LBB0_1436
 11142  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
 11143  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
 11144  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
 11145  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
 11146  	LONG $0xc05bfcc5                           // vcvtdq2ps    ymm0, ymm0
 11147  	LONG $0xc95bfcc5                           // vcvtdq2ps    ymm1, ymm1
 11148  	LONG $0xd25bfcc5                           // vcvtdq2ps    ymm2, ymm2
 11149  	LONG $0xdb5bfcc5                           // vcvtdq2ps    ymm3, ymm3
 11150  	LONG $0x0411fcc5; BYTE $0xb9               // vmovups    yword [rcx + 4*rdi], ymm0
 11151  	LONG $0x4c11fcc5; WORD $0x20b9             // vmovups    yword [rcx + 4*rdi + 32], ymm1
 11152  	LONG $0x5411fcc5; WORD $0x40b9             // vmovups    yword [rcx + 4*rdi + 64], ymm2
 11153  	LONG $0x5c11fcc5; WORD $0x60b9             // vmovups    yword [rcx + 4*rdi + 96], ymm3
 11154  
 11155  LBB0_1436:
 11156  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11157  	JE   LBB0_1553
 11158  	JMP  LBB0_1437
 11159  
 11160  LBB0_1441:
 11161  	WORD $0xff31 // xor    edi, edi
 11162  
 11163  LBB0_1442:
 11164  	LONG $0x01c0f641               // test    r8b, 1
 11165  	JE   LBB0_1444
 11166  	LONG $0x456ff9c5; BYTE $0x70   // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
 11167  	LONG $0x0c6ffac5; BYTE $0xba   // vmovdqu    xmm1, oword [rdx + 4*rdi]
 11168  	LONG $0x546ffac5; WORD $0x10ba // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
 11169  	LONG $0x5c6ffac5; WORD $0x20ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
 11170  	LONG $0x646ffac5; WORD $0x30ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
 11171  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11172  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 11173  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 11174  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 11175  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11176  	LONG $0xd262e1c5               // vpunpckldq    xmm2, xmm3, xmm2
 11177  	LONG $0x5c6ffac5; WORD $0x50ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
 11178  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11179  	LONG $0x646ffac5; WORD $0x40ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
 11180  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 11181  	LONG $0xdb62d9c5               // vpunpckldq    xmm3, xmm4, xmm3
 11182  	LONG $0x646ffac5; WORD $0x70ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
 11183  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 11184  	LONG $0x6c6ffac5; WORD $0x60ba // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
 11185  	LONG $0x0051e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm5, xmm0
 11186  	LONG $0xc462f9c5               // vpunpckldq    xmm0, xmm0, xmm4
 11187  	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
 11188  	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
 11189  	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
 11190  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 11191  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 11192  
 11193  LBB0_1444:
 11194  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11195  	JE   LBB0_1553
 11196  	JMP  LBB0_1445
 11197  
 11198  LBB0_1449:
 11199  	WORD $0xff31 // xor    edi, edi
 11200  
 11201  LBB0_1450:
 11202  	LONG $0x01c0f641               // test    r8b, 1
 11203  	JE   LBB0_1452
 11204  	LONG $0x04e6fdc5; BYTE $0xfa   // vcvttpd2dq    xmm0, yword [rdx + 8*rdi]
 11205  	LONG $0x2b79e2c4; BYTE $0xc0   // vpackusdw    xmm0, xmm0, xmm0
 11206  	LONG $0xc067f9c5               // vpackuswb    xmm0, xmm0, xmm0
 11207  	LONG $0x4ce6fdc5; WORD $0x20fa // vcvttpd2dq    xmm1, yword [rdx + 8*rdi + 32]
 11208  	LONG $0x2b71e2c4; BYTE $0xc9   // vpackusdw    xmm1, xmm1, xmm1
 11209  	LONG $0x54e6fdc5; WORD $0x40fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 64]
 11210  	LONG $0xc967f1c5               // vpackuswb    xmm1, xmm1, xmm1
 11211  	LONG $0xc162f9c5               // vpunpckldq    xmm0, xmm0, xmm1
 11212  	LONG $0x2b69e2c4; BYTE $0xca   // vpackusdw    xmm1, xmm2, xmm2
 11213  	LONG $0xc967f1c5               // vpackuswb    xmm1, xmm1, xmm1
 11214  	LONG $0x54e6fdc5; WORD $0x60fa // vcvttpd2dq    xmm2, yword [rdx + 8*rdi + 96]
 11215  	LONG $0x2b69e2c4; BYTE $0xd2   // vpackusdw    xmm2, xmm2, xmm2
 11216  	LONG $0xd267e9c5               // vpackuswb    xmm2, xmm2, xmm2
 11217  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 11218  	LONG $0xc16cf9c5               // vpunpcklqdq    xmm0, xmm0, xmm1
 11219  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 11220  
 11221  LBB0_1452:
 11222  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11223  	JE   LBB0_1553
 11224  	JMP  LBB0_1453
 11225  
 11226  LBB0_1457:
 11227  	WORD $0xff31 // xor    edi, edi
 11228  
 11229  LBB0_1458:
 11230  	LONG $0x01c0f641               // test    r8b, 1
 11231  	JE   LBB0_1460
 11232  	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
 11233  	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
 11234  	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
 11235  	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
 11236  	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
 11237  	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
 11238  	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
 11239  	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3
 11240  
 11241  LBB0_1460:
 11242  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11243  	JE   LBB0_1553
 11244  	JMP  LBB0_1461
 11245  
 11246  LBB0_1465:
 11247  	WORD $0xff31 // xor    edi, edi
 11248  
 11249  LBB0_1466:
 11250  	LONG $0x01c0f641               // test    r8b, 1
 11251  	JE   LBB0_1468
 11252  	LONG $0x456ff9c5; BYTE $0x40   // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
 11253  	LONG $0x0c6ffac5; BYTE $0xfa   // vmovdqu    xmm1, oword [rdx + 8*rdi]
 11254  	LONG $0x546ffac5; WORD $0x10fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
 11255  	LONG $0x5c6ffac5; WORD $0x20fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
 11256  	LONG $0x646ffac5; WORD $0x30fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
 11257  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11258  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 11259  	LONG $0xca61f1c5               // vpunpcklwd    xmm1, xmm1, xmm2
 11260  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 11261  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11262  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 11263  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 11264  	LONG $0x546ffac5; WORD $0x50fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
 11265  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11266  	LONG $0x5c6ffac5; WORD $0x40fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
 11267  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11268  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 11269  	LONG $0x5c6ffac5; WORD $0x70fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
 11270  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11271  	LONG $0x646ffac5; WORD $0x60fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
 11272  	LONG $0x0059e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm4, xmm0
 11273  	LONG $0xc361f9c5               // vpunpcklwd    xmm0, xmm0, xmm3
 11274  	LONG $0xc062e9c5               // vpunpckldq    xmm0, xmm2, xmm0
 11275  	LONG $0xc06cf1c5               // vpunpcklqdq    xmm0, xmm1, xmm0
 11276  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 11277  
 11278  LBB0_1468:
 11279  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11280  	JE   LBB0_1553
 11281  	JMP  LBB0_1469
 11282  
 11283  LBB0_1473:
 11284  	WORD $0xff31 // xor    edi, edi
 11285  
 11286  LBB0_1474:
 11287  	LONG $0x01c0f641               // test    r8b, 1
 11288  	JE   LBB0_1476
 11289  	QUAD $0x000000a0856ffdc5       // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
 11290  	LONG $0x0cdbfdc5; BYTE $0x7a   // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
 11291  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 11292  	LONG $0xca67f1c5               // vpackuswb    xmm1, xmm1, xmm2
 11293  	LONG $0x54dbfdc5; WORD $0x207a // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
 11294  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 11295  	LONG $0xd367e9c5               // vpackuswb    xmm2, xmm2, xmm3
 11296  	LONG $0x5cdbfdc5; WORD $0x407a // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
 11297  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 11298  	LONG $0xdc67e1c5               // vpackuswb    xmm3, xmm3, xmm4
 11299  	LONG $0x44dbfdc5; WORD $0x607a // vpand    ymm0, ymm0, yword [rdx + 2*rdi + 96]
 11300  	LONG $0x397de3c4; WORD $0x01c4 // vextracti128    xmm4, ymm0, 1
 11301  	LONG $0xc467f9c5               // vpackuswb    xmm0, xmm0, xmm4
 11302  	LONG $0x0c7ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm1
 11303  	LONG $0x547ffac5; WORD $0x1039 // vmovdqu    oword [rcx + rdi + 16], xmm2
 11304  	LONG $0x5c7ffac5; WORD $0x2039 // vmovdqu    oword [rcx + rdi + 32], xmm3
 11305  	LONG $0x447ffac5; WORD $0x3039 // vmovdqu    oword [rcx + rdi + 48], xmm0
 11306  
 11307  LBB0_1476:
 11308  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11309  	JE   LBB0_1553
 11310  	JMP  LBB0_1477
 11311  
 11312  LBB0_1481:
 11313  	WORD $0xff31 // xor    edi, edi
 11314  
 11315  LBB0_1482:
 11316  	LONG $0x01c0f641               // test    r8b, 1
 11317  	JE   LBB0_1484
 11318  	QUAD $0x000000a0856ffdc5       // vmovdqa    ymm0, yword 160[rbp] /* [rip + .LCPI0_16] */
 11319  	LONG $0x0cdbfdc5; BYTE $0x7a   // vpand    ymm1, ymm0, yword [rdx + 2*rdi]
 11320  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 11321  	LONG $0xca67f1c5               // vpackuswb    xmm1, xmm1, xmm2
 11322  	LONG $0x54dbfdc5; WORD $0x207a // vpand    ymm2, ymm0, yword [rdx + 2*rdi + 32]
 11323  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 11324  	LONG $0xd367e9c5               // vpackuswb    xmm2, xmm2, xmm3
 11325  	LONG $0x5cdbfdc5; WORD $0x407a // vpand    ymm3, ymm0, yword [rdx + 2*rdi + 64]
 11326  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 11327  	LONG $0xdc67e1c5               // vpackuswb    xmm3, xmm3, xmm4
 11328  	LONG $0x44dbfdc5; WORD $0x607a // vpand    ymm0, ymm0, yword [rdx + 2*rdi + 96]
 11329  	LONG $0x397de3c4; WORD $0x01c4 // vextracti128    xmm4, ymm0, 1
 11330  	LONG $0xc467f9c5               // vpackuswb    xmm0, xmm0, xmm4
 11331  	LONG $0x0c7ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm1
 11332  	LONG $0x547ffac5; WORD $0x1039 // vmovdqu    oword [rcx + rdi + 16], xmm2
 11333  	LONG $0x5c7ffac5; WORD $0x2039 // vmovdqu    oword [rcx + rdi + 32], xmm3
 11334  	LONG $0x447ffac5; WORD $0x3039 // vmovdqu    oword [rcx + rdi + 48], xmm0
 11335  
 11336  LBB0_1484:
 11337  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11338  	JE   LBB0_1553
 11339  	JMP  LBB0_1485
 11340  
 11341  LBB0_1489:
 11342  	WORD $0xff31 // xor    edi, edi
 11343  
 11344  LBB0_1490:
 11345  	LONG $0x01c0f641               // test    r8b, 1
 11346  	JE   LBB0_1492
 11347  	LONG $0x456ff9c5; BYTE $0x40   // vmovdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_4] */
 11348  	LONG $0x0c6ffac5; BYTE $0xfa   // vmovdqu    xmm1, oword [rdx + 8*rdi]
 11349  	LONG $0x546ffac5; WORD $0x10fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 16]
 11350  	LONG $0x5c6ffac5; WORD $0x20fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 32]
 11351  	LONG $0x646ffac5; WORD $0x30fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 48]
 11352  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11353  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 11354  	LONG $0xca61f1c5               // vpunpcklwd    xmm1, xmm1, xmm2
 11355  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 11356  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11357  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 11358  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 11359  	LONG $0x546ffac5; WORD $0x50fa // vmovdqu    xmm2, oword [rdx + 8*rdi + 80]
 11360  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11361  	LONG $0x5c6ffac5; WORD $0x40fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 64]
 11362  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11363  	LONG $0xd261e1c5               // vpunpcklwd    xmm2, xmm3, xmm2
 11364  	LONG $0x5c6ffac5; WORD $0x70fa // vmovdqu    xmm3, oword [rdx + 8*rdi + 112]
 11365  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11366  	LONG $0x646ffac5; WORD $0x60fa // vmovdqu    xmm4, oword [rdx + 8*rdi + 96]
 11367  	LONG $0x0059e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm4, xmm0
 11368  	LONG $0xc361f9c5               // vpunpcklwd    xmm0, xmm0, xmm3
 11369  	LONG $0xc062e9c5               // vpunpckldq    xmm0, xmm2, xmm0
 11370  	LONG $0xc06cf1c5               // vpunpcklqdq    xmm0, xmm1, xmm0
 11371  	LONG $0x047ffac5; BYTE $0x39   // vmovdqu    oword [rcx + rdi], xmm0
 11372  
 11373  LBB0_1492:
 11374  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11375  	JE   LBB0_1553
 11376  	JMP  LBB0_1493
 11377  
 11378  LBB0_1497:
 11379  	WORD $0xff31 // xor    edi, edi
 11380  
 11381  LBB0_1498:
 11382  	LONG $0x01c0f641               // test    r8b, 1
 11383  	JE   LBB0_1500
 11384  	LONG $0x045bfec5; BYTE $0xba   // vcvttps2dq    ymm0, yword [rdx + 4*rdi]
 11385  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
 11386  	LONG $0xc16bf9c5               // vpackssdw    xmm0, xmm0, xmm1
 11387  	LONG $0x4c5bfec5; WORD $0x20ba // vcvttps2dq    ymm1, yword [rdx + 4*rdi + 32]
 11388  	LONG $0x397de3c4; WORD $0x01ca // vextracti128    xmm2, ymm1, 1
 11389  	LONG $0xca6bf1c5               // vpackssdw    xmm1, xmm1, xmm2
 11390  	LONG $0x545bfec5; WORD $0x40ba // vcvttps2dq    ymm2, yword [rdx + 4*rdi + 64]
 11391  	LONG $0x397de3c4; WORD $0x01d3 // vextracti128    xmm3, ymm2, 1
 11392  	LONG $0xd36be9c5               // vpackssdw    xmm2, xmm2, xmm3
 11393  	LONG $0x5c5bfec5; WORD $0x60ba // vcvttps2dq    ymm3, yword [rdx + 4*rdi + 96]
 11394  	LONG $0x397de3c4; WORD $0x01dc // vextracti128    xmm4, ymm3, 1
 11395  	LONG $0xdc6be1c5               // vpackssdw    xmm3, xmm3, xmm4
 11396  	LONG $0x386de3c4; WORD $0x01d3 // vinserti128    ymm2, ymm2, xmm3, 1
 11397  	LONG $0xd067edc5               // vpackuswb    ymm2, ymm2, ymm0
 11398  	LONG $0x387de3c4; WORD $0x01c1 // vinserti128    ymm0, ymm0, xmm1, 1
 11399  	LONG $0xc067fdc5               // vpackuswb    ymm0, ymm0, ymm0
 11400  	LONG $0xc26cfdc5               // vpunpcklqdq    ymm0, ymm0, ymm2
 11401  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 11402  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 11403  
 11404  LBB0_1500:
 11405  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11406  	JE   LBB0_1553
 11407  	JMP  LBB0_1501
 11408  
 11409  LBB0_1505:
 11410  	WORD $0xff31 // xor    edi, edi
 11411  
 11412  LBB0_1506:
 11413  	LONG $0x01c0f641               // test    r8b, 1
 11414  	JE   LBB0_1508
 11415  	LONG $0x0410fdc5; BYTE $0x3a   // vmovupd    ymm0, yword [rdx + rdi]
 11416  	LONG $0x4c10fdc5; WORD $0x203a // vmovupd    ymm1, yword [rdx + rdi + 32]
 11417  	LONG $0x5410fdc5; WORD $0x403a // vmovupd    ymm2, yword [rdx + rdi + 64]
 11418  	LONG $0x5c10fdc5; WORD $0x603a // vmovupd    ymm3, yword [rdx + rdi + 96]
 11419  	LONG $0x0411fdc5; BYTE $0x39   // vmovupd    yword [rcx + rdi], ymm0
 11420  	LONG $0x4c11fdc5; WORD $0x2039 // vmovupd    yword [rcx + rdi + 32], ymm1
 11421  	LONG $0x5411fdc5; WORD $0x4039 // vmovupd    yword [rcx + rdi + 64], ymm2
 11422  	LONG $0x5c11fdc5; WORD $0x6039 // vmovupd    yword [rcx + rdi + 96], ymm3
 11423  
 11424  LBB0_1508:
 11425  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11426  	JE   LBB0_1553
 11427  	JMP  LBB0_1509
 11428  
 11429  LBB0_1513:
 11430  	WORD $0xff31 // xor    edi, edi
 11431  
 11432  LBB0_1514:
 11433  	LONG $0x01c0f641               // test    r8b, 1
 11434  	JE   LBB0_1516
 11435  	LONG $0x456ff9c5; BYTE $0x70   // vmovdqa    xmm0, oword 112[rbp] /* [rip + .LCPI0_12] */
 11436  	LONG $0x0c6ffac5; BYTE $0xba   // vmovdqu    xmm1, oword [rdx + 4*rdi]
 11437  	LONG $0x546ffac5; WORD $0x10ba // vmovdqu    xmm2, oword [rdx + 4*rdi + 16]
 11438  	LONG $0x5c6ffac5; WORD $0x20ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 32]
 11439  	LONG $0x646ffac5; WORD $0x30ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 48]
 11440  	LONG $0x0069e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm2, xmm0
 11441  	LONG $0x0071e2c4; BYTE $0xc8   // vpshufb    xmm1, xmm1, xmm0
 11442  	LONG $0xca62f1c5               // vpunpckldq    xmm1, xmm1, xmm2
 11443  	LONG $0x0059e2c4; BYTE $0xd0   // vpshufb    xmm2, xmm4, xmm0
 11444  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11445  	LONG $0xd262e1c5               // vpunpckldq    xmm2, xmm3, xmm2
 11446  	LONG $0x5c6ffac5; WORD $0x50ba // vmovdqu    xmm3, oword [rdx + 4*rdi + 80]
 11447  	LONG $0x0061e2c4; BYTE $0xd8   // vpshufb    xmm3, xmm3, xmm0
 11448  	LONG $0x646ffac5; WORD $0x40ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 64]
 11449  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 11450  	LONG $0xdb62d9c5               // vpunpckldq    xmm3, xmm4, xmm3
 11451  	LONG $0x646ffac5; WORD $0x70ba // vmovdqu    xmm4, oword [rdx + 4*rdi + 112]
 11452  	LONG $0x0059e2c4; BYTE $0xe0   // vpshufb    xmm4, xmm4, xmm0
 11453  	LONG $0x6c6ffac5; WORD $0x60ba // vmovdqu    xmm5, oword [rdx + 4*rdi + 96]
 11454  	LONG $0x0051e2c4; BYTE $0xc0   // vpshufb    xmm0, xmm5, xmm0
 11455  	LONG $0xc462f9c5               // vpunpckldq    xmm0, xmm0, xmm4
 11456  	LONG $0x3865e3c4; WORD $0x01c0 // vinserti128    ymm0, ymm3, xmm0, 1
 11457  	LONG $0x3875e3c4; WORD $0x01ca // vinserti128    ymm1, ymm1, xmm2, 1
 11458  	LONG $0xc06cf5c5               // vpunpcklqdq    ymm0, ymm1, ymm0
 11459  	LONG $0x00fde3c4; WORD $0xd8c0 // vpermq    ymm0, ymm0, 216
 11460  	LONG $0x047ffec5; BYTE $0x39   // vmovdqu    yword [rcx + rdi], ymm0
 11461  
 11462  LBB0_1516:
 11463  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11464  	JE   LBB0_1553
 11465  	JMP  LBB0_1517
 11466  
 11467  LBB0_1521:
 11468  	WORD $0xff31 // xor    edi, edi
 11469  
 11470  LBB0_1522:
 11471  	LONG $0x01c0f641               // test    r8b, 1
 11472  	JE   LBB0_1524
 11473  	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
 11474  	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
 11475  	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
 11476  	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
 11477  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
 11478  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
 11479  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
 11480  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
 11481  
 11482  LBB0_1524:
 11483  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11484  	JE   LBB0_1553
 11485  	JMP  LBB0_1525
 11486  
 11487  LBB0_1529:
 11488  	WORD $0xff31 // xor    edi, edi
 11489  
 11490  LBB0_1530:
 11491  	LONG $0x01c0f641                           // test    r8b, 1
 11492  	JE   LBB0_1532
 11493  	LONG $0x217de2c4; WORD $0x3a04             // vpmovsxbd    ymm0, qword [rdx + rdi]
 11494  	LONG $0x217de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovsxbd    ymm1, qword [rdx + rdi + 8]
 11495  	LONG $0x217de2c4; WORD $0x3a54; BYTE $0x10 // vpmovsxbd    ymm2, qword [rdx + rdi + 16]
 11496  	LONG $0x217de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovsxbd    ymm3, qword [rdx + rdi + 24]
 11497  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
 11498  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
 11499  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
 11500  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
 11501  
 11502  LBB0_1532:
 11503  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11504  	JE   LBB0_1553
 11505  	JMP  LBB0_1533
 11506  
 11507  LBB0_1537:
 11508  	WORD $0xff31 // xor    edi, edi
 11509  
 11510  LBB0_1538:
 11511  	LONG $0x01c0f641                           // test    r8b, 1
 11512  	JE   LBB0_1540
 11513  	LONG $0x317de2c4; WORD $0x3a04             // vpmovzxbd    ymm0, qword [rdx + rdi]
 11514  	LONG $0x317de2c4; WORD $0x3a4c; BYTE $0x08 // vpmovzxbd    ymm1, qword [rdx + rdi + 8]
 11515  	LONG $0x317de2c4; WORD $0x3a54; BYTE $0x10 // vpmovzxbd    ymm2, qword [rdx + rdi + 16]
 11516  	LONG $0x317de2c4; WORD $0x3a5c; BYTE $0x18 // vpmovzxbd    ymm3, qword [rdx + rdi + 24]
 11517  	LONG $0x047ffec5; BYTE $0xb9               // vmovdqu    yword [rcx + 4*rdi], ymm0
 11518  	LONG $0x4c7ffec5; WORD $0x20b9             // vmovdqu    yword [rcx + 4*rdi + 32], ymm1
 11519  	LONG $0x547ffec5; WORD $0x40b9             // vmovdqu    yword [rcx + 4*rdi + 64], ymm2
 11520  	LONG $0x5c7ffec5; WORD $0x60b9             // vmovdqu    yword [rcx + 4*rdi + 96], ymm3
 11521  
 11522  LBB0_1540:
 11523  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11524  	JE   LBB0_1553
 11525  	JMP  LBB0_1541
 11526  
 11527  LBB0_1545:
 11528  	WORD $0xff31 // xor    edi, edi
 11529  
 11530  LBB0_1546:
 11531  	LONG $0x01c0f641               // test    r8b, 1
 11532  	JE   LBB0_1548
 11533  	LONG $0x0410fdc5; BYTE $0xba   // vmovupd    ymm0, yword [rdx + 4*rdi]
 11534  	LONG $0x4c10fdc5; WORD $0x20ba // vmovupd    ymm1, yword [rdx + 4*rdi + 32]
 11535  	LONG $0x5410fdc5; WORD $0x40ba // vmovupd    ymm2, yword [rdx + 4*rdi + 64]
 11536  	LONG $0x5c10fdc5; WORD $0x60ba // vmovupd    ymm3, yword [rdx + 4*rdi + 96]
 11537  	LONG $0x0411fdc5; BYTE $0xb9   // vmovupd    yword [rcx + 4*rdi], ymm0
 11538  	LONG $0x4c11fdc5; WORD $0x20b9 // vmovupd    yword [rcx + 4*rdi + 32], ymm1
 11539  	LONG $0x5411fdc5; WORD $0x40b9 // vmovupd    yword [rcx + 4*rdi + 64], ymm2
 11540  	LONG $0x5c11fdc5; WORD $0x60b9 // vmovupd    yword [rcx + 4*rdi + 96], ymm3
 11541  
 11542  LBB0_1548:
 11543  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 11544  	JE   LBB0_1553
 11545  	JMP  LBB0_1549