github.com/apache/arrow/go/v14@v14.0.1/arrow/compute/internal/kernels/cast_numeric_sse4_amd64.s (about)

     1  //go:build go1.18 && !noasm && !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x43e0000000000000
     5  DATA LCDATA1<>+0x008(SB)/8, $0x0000000000000000
     6  DATA LCDATA1<>+0x010(SB)/8, $0x0000000000000400
     7  DATA LCDATA1<>+0x018(SB)/8, $0x0000000000000000
     8  DATA LCDATA1<>+0x020(SB)/8, $0x4f0000004f000000
     9  DATA LCDATA1<>+0x028(SB)/8, $0x4f0000004f000000
    10  DATA LCDATA1<>+0x030(SB)/8, $0x8000000080000000
    11  DATA LCDATA1<>+0x038(SB)/8, $0x8000000080000000
    12  DATA LCDATA1<>+0x040(SB)/8, $0x0000000000000800
    13  DATA LCDATA1<>+0x048(SB)/8, $0x0000000000000000
    14  DATA LCDATA1<>+0x050(SB)/8, $0x4330000000000000
    15  DATA LCDATA1<>+0x058(SB)/8, $0x4330000000000000
    16  DATA LCDATA1<>+0x060(SB)/8, $0x4530000000000000
    17  DATA LCDATA1<>+0x068(SB)/8, $0x4530000000000000
    18  DATA LCDATA1<>+0x070(SB)/8, $0x4530000000100000
    19  DATA LCDATA1<>+0x078(SB)/8, $0x4530000000100000
    20  DATA LCDATA1<>+0x080(SB)/8, $0x4530000043300000
    21  DATA LCDATA1<>+0x088(SB)/8, $0x0000000000000000
    22  DATA LCDATA1<>+0x090(SB)/8, $0x4330000000000000
    23  DATA LCDATA1<>+0x098(SB)/8, $0x4530000000000000
    24  DATA LCDATA1<>+0x0a0(SB)/8, $0x0000000000000001
    25  DATA LCDATA1<>+0x0a8(SB)/8, $0x0000000000000001
    26  DATA LCDATA1<>+0x0b0(SB)/8, $0x0d0c090805040100
    27  DATA LCDATA1<>+0x0b8(SB)/8, $0x0f0e0d0c0d0c0908
    28  DATA LCDATA1<>+0x0c0(SB)/8, $0x000000000c080400
    29  DATA LCDATA1<>+0x0c8(SB)/8, $0x0000000000000000
    30  DATA LCDATA1<>+0x0d0(SB)/8, $0x4b0000004b000000
    31  DATA LCDATA1<>+0x0d8(SB)/8, $0x4b0000004b000000
    32  DATA LCDATA1<>+0x0e0(SB)/8, $0x5300000053000000
    33  DATA LCDATA1<>+0x0e8(SB)/8, $0x5300000053000000
    34  DATA LCDATA1<>+0x0f0(SB)/8, $0x5300008053000080
    35  DATA LCDATA1<>+0x0f8(SB)/8, $0x5300008053000080
    36  DATA LCDATA1<>+0x100(SB)/8, $0x0e0c0a0806040200
    37  DATA LCDATA1<>+0x108(SB)/8, $0x0000000000000000
    38  DATA LCDATA1<>+0x110(SB)/8, $0x000000005f000000
    39  GLOBL LCDATA1<>(SB), 8, $280
    40  
    41  TEXT ยท_cast_type_numeric_sse4(SB), $0-40
    42  
    43  	MOVQ itype+0(FP), DI
    44  	MOVQ otype+8(FP), SI
    45  	MOVQ in+16(FP), DX
    46  	MOVQ out+24(FP), CX
    47  	MOVQ len+32(FP), R8
    48  	LEAQ LCDATA1<>(SB), BP
    49  
    50  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    51  	JG   LBB0_13
    52  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
    53  	JLE  LBB0_25
    54  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
    55  	JE   LBB0_45
    56  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
    57  	JE   LBB0_53
    58  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    59  	JNE  LBB0_1526
    60  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
    61  	JG   LBB0_93
    62  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
    63  	JLE  LBB0_163
    64  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
    65  	JE   LBB0_263
    66  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
    67  	JE   LBB0_266
    68  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
    69  	JNE  LBB0_1526
    70  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
    71  	JLE  LBB0_1526
    72  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
    73  	LONG $0x08f88341         // cmp    r8d, 8
    74  	JB   LBB0_12
    75  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
    76  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
    77  	JBE  LBB0_761
    78  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
    79  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
    80  	JBE  LBB0_761
    81  
    82  LBB0_12:
    83  	WORD $0xf631 // xor    esi, esi
    84  
    85  LBB0_1104:
    86  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
    87  	WORD $0xf749; BYTE $0xd0 // not    r8
    88  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
    89  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
    90  	LONG $0x03e78348         // and    rdi, 3
    91  	JE   LBB0_1106
    92  
    93  LBB0_1105:
    94  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
    95  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
    96  	LONG $0x01c68348         // add    rsi, 1
    97  	LONG $0xffc78348         // add    rdi, -1
    98  	JNE  LBB0_1105
    99  
   100  LBB0_1106:
   101  	LONG $0x03f88349 // cmp    r8, 3
   102  	JB   LBB0_1526
   103  
   104  LBB0_1107:
   105  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   106  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   107  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
   108  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
   109  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
   110  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
   111  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
   112  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
   113  	LONG $0x04c68348         // add    rsi, 4
   114  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
   115  	JNE  LBB0_1107
   116  	JMP  LBB0_1526
   117  
   118  LBB0_13:
   119  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   120  	JLE  LBB0_35
   121  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   122  	JE   LBB0_61
   123  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   124  	JE   LBB0_69
   125  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   126  	JNE  LBB0_1526
   127  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   128  	JG   LBB0_100
   129  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   130  	JLE  LBB0_168
   131  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   132  	JE   LBB0_269
   133  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   134  	JE   LBB0_272
   135  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   136  	JNE  LBB0_1526
   137  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   138  	JLE  LBB0_1526
   139  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   140  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   141  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   142  	LONG $0x03e08341         // and    r8d, 3
   143  	LONG $0x03ff8348         // cmp    rdi, 3
   144  	JAE  LBB0_446
   145  	WORD $0xff31             // xor    edi, edi
   146  	JMP  LBB0_448
   147  
   148  LBB0_25:
   149  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   150  	JE   LBB0_77
   151  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   152  	JNE  LBB0_1526
   153  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   154  	JG   LBB0_107
   155  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   156  	JLE  LBB0_173
   157  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   158  	JE   LBB0_275
   159  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   160  	JE   LBB0_278
   161  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   162  	JNE  LBB0_1526
   163  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   164  	JLE  LBB0_1526
   165  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   166  	LONG $0x08f88341         // cmp    r8d, 8
   167  	JB   LBB0_34
   168  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   169  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   170  	JBE  LBB0_763
   171  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   172  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   173  	JBE  LBB0_763
   174  
   175  LBB0_34:
   176  	WORD $0xf631 // xor    esi, esi
   177  
   178  LBB0_1482:
   179  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   180  	WORD $0xf749; BYTE $0xd0 // not    r8
   181  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   182  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
   183  	LONG $0x03e08348         // and    rax, 3
   184  	JE   LBB0_1484
   185  
   186  LBB0_1483:
   187  	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
   188  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
   189  	LONG $0x01c68348         // add    rsi, 1
   190  	LONG $0xffc08348         // add    rax, -1
   191  	JNE  LBB0_1483
   192  
   193  LBB0_1484:
   194  	LONG $0x03f88349 // cmp    r8, 3
   195  	JB   LBB0_1526
   196  
   197  LBB0_1485:
   198  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
   199  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
   200  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
   201  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
   202  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
   203  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
   204  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
   205  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
   206  	LONG $0x04c68348             // add    rsi, 4
   207  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   208  	JNE  LBB0_1485
   209  	JMP  LBB0_1526
   210  
   211  LBB0_35:
   212  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
   213  	JE   LBB0_85
   214  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   215  	JNE  LBB0_1526
   216  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   217  	JG   LBB0_114
   218  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   219  	JLE  LBB0_178
   220  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   221  	JE   LBB0_281
   222  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   223  	JE   LBB0_284
   224  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   225  	JNE  LBB0_1526
   226  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   227  	JLE  LBB0_1526
   228  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   229  	LONG $0x04f88341         // cmp    r8d, 4
   230  	JAE  LBB0_454
   231  	WORD $0xf631             // xor    esi, esi
   232  	JMP  LBB0_948
   233  
   234  LBB0_45:
   235  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   236  	JG   LBB0_121
   237  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   238  	JLE  LBB0_183
   239  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   240  	JE   LBB0_287
   241  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   242  	JE   LBB0_290
   243  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   244  	JNE  LBB0_1526
   245  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   246  	JLE  LBB0_1526
   247  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   248  	LONG $0x08f88341         // cmp    r8d, 8
   249  	JAE  LBB0_457
   250  	WORD $0xf631             // xor    esi, esi
   251  	JMP  LBB0_953
   252  
   253  LBB0_53:
   254  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   255  	JG   LBB0_128
   256  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   257  	JLE  LBB0_188
   258  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   259  	JE   LBB0_293
   260  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   261  	JE   LBB0_296
   262  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   263  	JNE  LBB0_1526
   264  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   265  	JLE  LBB0_1526
   266  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   267  	LONG $0x08f88341         // cmp    r8d, 8
   268  	JAE  LBB0_460
   269  	WORD $0xf631             // xor    esi, esi
   270  	JMP  LBB0_958
   271  
   272  LBB0_61:
   273  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   274  	JG   LBB0_135
   275  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   276  	JLE  LBB0_193
   277  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   278  	JE   LBB0_299
   279  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   280  	JE   LBB0_302
   281  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   282  	JNE  LBB0_1526
   283  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   284  	JLE  LBB0_1526
   285  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   286  	LONG $0x04f88341         // cmp    r8d, 4
   287  	JAE  LBB0_463
   288  	WORD $0xf631             // xor    esi, esi
   289  	JMP  LBB0_963
   290  
   291  LBB0_69:
   292  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   293  	JG   LBB0_142
   294  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   295  	JLE  LBB0_198
   296  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   297  	JE   LBB0_305
   298  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   299  	JE   LBB0_308
   300  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   301  	JNE  LBB0_1526
   302  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   303  	JLE  LBB0_1526
   304  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   305  	LONG $0x08f88341         // cmp    r8d, 8
   306  	JAE  LBB0_466
   307  	WORD $0xf631             // xor    esi, esi
   308  	JMP  LBB0_968
   309  
   310  LBB0_77:
   311  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   312  	JG   LBB0_149
   313  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   314  	JLE  LBB0_203
   315  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   316  	JE   LBB0_311
   317  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   318  	JE   LBB0_314
   319  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   320  	JNE  LBB0_1526
   321  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   322  	JLE  LBB0_1526
   323  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   324  	LONG $0x08f88341         // cmp    r8d, 8
   325  	JB   LBB0_84
   326  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   327  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   328  	JBE  LBB0_766
   329  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   330  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   331  	JBE  LBB0_766
   332  
   333  LBB0_84:
   334  	WORD $0xf631 // xor    esi, esi
   335  
   336  LBB0_1490:
   337  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   338  	WORD $0xf749; BYTE $0xd0 // not    r8
   339  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   340  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
   341  	LONG $0x03e08348         // and    rax, 3
   342  	JE   LBB0_1492
   343  
   344  LBB0_1491:
   345  	LONG $0x323cb60f         // movzx    edi, byte [rdx + rsi]
   346  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
   347  	LONG $0x01c68348         // add    rsi, 1
   348  	LONG $0xffc08348         // add    rax, -1
   349  	JNE  LBB0_1491
   350  
   351  LBB0_1492:
   352  	LONG $0x03f88349 // cmp    r8, 3
   353  	JB   LBB0_1526
   354  
   355  LBB0_1493:
   356  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   357  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
   358  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   359  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
   360  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   361  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
   362  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   363  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
   364  	LONG $0x04c68348             // add    rsi, 4
   365  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   366  	JNE  LBB0_1493
   367  	JMP  LBB0_1526
   368  
   369  LBB0_85:
   370  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   371  	JG   LBB0_156
   372  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   373  	JLE  LBB0_208
   374  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
   375  	JE   LBB0_317
   376  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
   377  	JE   LBB0_320
   378  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
   379  	JNE  LBB0_1526
   380  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   381  	JLE  LBB0_1526
   382  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   383  	LONG $0x08f88341         // cmp    r8d, 8
   384  	JB   LBB0_92
   385  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
   386  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   387  	JBE  LBB0_769
   388  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
   389  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   390  	JBE  LBB0_769
   391  
   392  LBB0_92:
   393  	WORD $0xf631 // xor    esi, esi
   394  
   395  LBB0_1114:
   396  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   397  	WORD $0xf749; BYTE $0xd0 // not    r8
   398  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   399  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   400  	LONG $0x03e78348         // and    rdi, 3
   401  	JE   LBB0_1116
   402  
   403  LBB0_1115:
   404  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   405  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   406  	LONG $0x01c68348         // add    rsi, 1
   407  	LONG $0xffc78348         // add    rdi, -1
   408  	JNE  LBB0_1115
   409  
   410  LBB0_1116:
   411  	LONG $0x03f88349 // cmp    r8, 3
   412  	JB   LBB0_1526
   413  
   414  LBB0_1117:
   415  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   416  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
   417  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
   418  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
   419  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
   420  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
   421  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
   422  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
   423  	LONG $0x04c68348         // add    rsi, 4
   424  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
   425  	JNE  LBB0_1117
   426  	JMP  LBB0_1526
   427  
   428  LBB0_93:
   429  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   430  	JLE  LBB0_213
   431  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   432  	JE   LBB0_323
   433  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   434  	JE   LBB0_326
   435  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   436  	JNE  LBB0_1526
   437  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   438  	JLE  LBB0_1526
   439  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   440  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   441  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   442  	LONG $0x03e08341         // and    r8d, 3
   443  	LONG $0x03ff8348         // cmp    rdi, 3
   444  	JAE  LBB0_475
   445  	WORD $0xff31             // xor    edi, edi
   446  	JMP  LBB0_477
   447  
   448  LBB0_100:
   449  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   450  	JLE  LBB0_218
   451  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   452  	JE   LBB0_329
   453  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   454  	JE   LBB0_332
   455  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   456  	JNE  LBB0_1526
   457  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   458  	JLE  LBB0_1526
   459  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   460  	LONG $0x04f88341         // cmp    r8d, 4
   461  	JB   LBB0_106
   462  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   463  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   464  	JBE  LBB0_771
   465  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
   466  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   467  	JBE  LBB0_771
   468  
   469  LBB0_106:
   470  	WORD $0xf631 // xor    esi, esi
   471  
   472  LBB0_1124:
   473  	WORD $0x8944; BYTE $0xcf // mov    edi, r9d
   474  	WORD $0xf729             // sub    edi, esi
   475  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   476  	WORD $0xf749; BYTE $0xd0 // not    r8
   477  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   478  	LONG $0x07e78348         // and    rdi, 7
   479  	JE   LBB0_1126
   480  
   481  LBB0_1125:
   482  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
   483  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
   484  	LONG $0x01c68348 // add    rsi, 1
   485  	LONG $0xffc78348 // add    rdi, -1
   486  	JNE  LBB0_1125
   487  
   488  LBB0_1126:
   489  	LONG $0x07f88349 // cmp    r8, 7
   490  	JB   LBB0_1526
   491  
   492  LBB0_1127:
   493  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
   494  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
   495  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
   496  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
   497  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
   498  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
   499  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
   500  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
   501  	LONG $0xf2448b48; BYTE $0x20 // mov    rax, qword [rdx + 8*rsi + 32]
   502  	LONG $0xf1448948; BYTE $0x20 // mov    qword [rcx + 8*rsi + 32], rax
   503  	LONG $0xf2448b48; BYTE $0x28 // mov    rax, qword [rdx + 8*rsi + 40]
   504  	LONG $0xf1448948; BYTE $0x28 // mov    qword [rcx + 8*rsi + 40], rax
   505  	LONG $0xf2448b48; BYTE $0x30 // mov    rax, qword [rdx + 8*rsi + 48]
   506  	LONG $0xf1448948; BYTE $0x30 // mov    qword [rcx + 8*rsi + 48], rax
   507  	LONG $0xf2448b48; BYTE $0x38 // mov    rax, qword [rdx + 8*rsi + 56]
   508  	LONG $0xf1448948; BYTE $0x38 // mov    qword [rcx + 8*rsi + 56], rax
   509  	LONG $0x08c68348             // add    rsi, 8
   510  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   511  	JNE  LBB0_1127
   512  	JMP  LBB0_1526
   513  
   514  LBB0_107:
   515  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   516  	JLE  LBB0_223
   517  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   518  	JE   LBB0_335
   519  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   520  	JE   LBB0_338
   521  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   522  	JNE  LBB0_1526
   523  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   524  	JLE  LBB0_1526
   525  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   526  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   527  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   528  	LONG $0x03e08341         // and    r8d, 3
   529  	LONG $0x03ff8348         // cmp    rdi, 3
   530  	JAE  LBB0_483
   531  	WORD $0xff31             // xor    edi, edi
   532  	JMP  LBB0_485
   533  
   534  LBB0_114:
   535  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   536  	JLE  LBB0_228
   537  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   538  	JE   LBB0_341
   539  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   540  	JE   LBB0_344
   541  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   542  	JNE  LBB0_1526
   543  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   544  	JLE  LBB0_1526
   545  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   546  	LONG $0x04f88341         // cmp    r8d, 4
   547  	JAE  LBB0_488
   548  	WORD $0xf631             // xor    esi, esi
   549  	JMP  LBB0_973
   550  
   551  LBB0_121:
   552  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   553  	JLE  LBB0_233
   554  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   555  	JE   LBB0_347
   556  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   557  	JE   LBB0_350
   558  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   559  	JNE  LBB0_1526
   560  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   561  	JLE  LBB0_1526
   562  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   563  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   564  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   565  	LONG $0x03e08341         // and    r8d, 3
   566  	LONG $0x03ff8348         // cmp    rdi, 3
   567  	JAE  LBB0_491
   568  	WORD $0xff31             // xor    edi, edi
   569  	JMP  LBB0_493
   570  
   571  LBB0_128:
   572  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   573  	JLE  LBB0_238
   574  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   575  	JE   LBB0_353
   576  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   577  	JE   LBB0_356
   578  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   579  	JNE  LBB0_1526
   580  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   581  	JLE  LBB0_1526
   582  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   583  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   584  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   585  	LONG $0x03e08341         // and    r8d, 3
   586  	LONG $0x03ff8348         // cmp    rdi, 3
   587  	JAE  LBB0_496
   588  	WORD $0xff31             // xor    edi, edi
   589  	JMP  LBB0_498
   590  
   591  LBB0_135:
   592  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   593  	JLE  LBB0_243
   594  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   595  	JE   LBB0_359
   596  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   597  	JE   LBB0_362
   598  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   599  	JNE  LBB0_1526
   600  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   601  	JLE  LBB0_1526
   602  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   603  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   604  	WORD $0xf089             // mov    eax, esi
   605  	WORD $0xe083; BYTE $0x03 // and    eax, 3
   606  	LONG $0x03ff8348         // cmp    rdi, 3
   607  	JAE  LBB0_501
   608  	WORD $0xff31             // xor    edi, edi
   609  	JMP  LBB0_503
   610  
   611  LBB0_142:
   612  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   613  	JLE  LBB0_248
   614  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   615  	JE   LBB0_365
   616  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   617  	JE   LBB0_368
   618  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   619  	JNE  LBB0_1526
   620  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   621  	JLE  LBB0_1526
   622  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   623  	LONG $0x04f88341         // cmp    r8d, 4
   624  	JAE  LBB0_506
   625  	WORD $0xf631             // xor    esi, esi
   626  	JMP  LBB0_979
   627  
   628  LBB0_149:
   629  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   630  	JLE  LBB0_253
   631  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   632  	JE   LBB0_371
   633  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   634  	JE   LBB0_374
   635  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   636  	JNE  LBB0_1526
   637  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   638  	JLE  LBB0_1526
   639  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   640  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   641  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
   642  	LONG $0x03e08341         // and    r8d, 3
   643  	LONG $0x03ff8348         // cmp    rdi, 3
   644  	JAE  LBB0_509
   645  	WORD $0xff31             // xor    edi, edi
   646  	JMP  LBB0_511
   647  
   648  LBB0_156:
   649  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
   650  	JLE  LBB0_258
   651  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
   652  	JE   LBB0_377
   653  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
   654  	JE   LBB0_380
   655  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
   656  	JNE  LBB0_1526
   657  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   658  	JLE  LBB0_1526
   659  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
   660  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
   661  	WORD $0xf089             // mov    eax, esi
   662  	WORD $0xe083; BYTE $0x03 // and    eax, 3
   663  	LONG $0x03ff8348         // cmp    rdi, 3
   664  	JAE  LBB0_514
   665  	WORD $0xff31             // xor    edi, edi
   666  	JMP  LBB0_516
   667  
   668  LBB0_163:
   669  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   670  	JE   LBB0_383
   671  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   672  	JNE  LBB0_1526
   673  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   674  	JLE  LBB0_1526
   675  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   676  	LONG $0x08f88341         // cmp    r8d, 8
   677  	JB   LBB0_167
   678  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
   679  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   680  	JBE  LBB0_773
   681  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   682  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   683  	JBE  LBB0_773
   684  
   685  LBB0_167:
   686  	WORD $0xf631 // xor    esi, esi
   687  
   688  LBB0_1498:
   689  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   690  	WORD $0xf749; BYTE $0xd0 // not    r8
   691  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   692  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   693  	LONG $0x03e78348         // and    rdi, 3
   694  	JE   LBB0_1500
   695  
   696  LBB0_1499:
   697  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
   698  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   699  	LONG $0x01c68348         // add    rsi, 1
   700  	LONG $0xffc78348         // add    rdi, -1
   701  	JNE  LBB0_1499
   702  
   703  LBB0_1500:
   704  	LONG $0x03f88349 // cmp    r8, 3
   705  	JB   LBB0_1526
   706  
   707  LBB0_1501:
   708  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
   709  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   710  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
   711  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   712  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
   713  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   714  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
   715  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   716  	LONG $0x04c68348             // add    rsi, 4
   717  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   718  	JNE  LBB0_1501
   719  	JMP  LBB0_1526
   720  
   721  LBB0_168:
   722  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   723  	JE   LBB0_386
   724  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   725  	JNE  LBB0_1526
   726  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   727  	JLE  LBB0_1526
   728  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   729  	LONG $0x04f88341         // cmp    r8d, 4
   730  	JB   LBB0_172
   731  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   732  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   733  	JBE  LBB0_776
   734  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   735  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   736  	JBE  LBB0_776
   737  
   738  LBB0_172:
   739  	WORD $0xf631 // xor    esi, esi
   740  
   741  LBB0_1506:
   742  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   743  	WORD $0xf749; BYTE $0xd0 // not    r8
   744  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   745  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   746  	LONG $0x03e78348         // and    rdi, 3
   747  	JE   LBB0_1508
   748  
   749  LBB0_1507:
   750  	LONG $0x042c0ff2; BYTE $0xf2 // cvttsd2si    eax, qword [rdx + 8*rsi]
   751  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   752  	LONG $0x01c68348             // add    rsi, 1
   753  	LONG $0xffc78348             // add    rdi, -1
   754  	JNE  LBB0_1507
   755  
   756  LBB0_1508:
   757  	LONG $0x03f88349 // cmp    r8, 3
   758  	JB   LBB0_1526
   759  
   760  LBB0_1509:
   761  	LONG $0x042c0ff2; BYTE $0xf2   // cvttsd2si    eax, qword [rdx + 8*rsi]
   762  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
   763  	LONG $0x442c0ff2; WORD $0x08f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 8]
   764  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
   765  	LONG $0x442c0ff2; WORD $0x10f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 16]
   766  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
   767  	LONG $0x442c0ff2; WORD $0x18f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 24]
   768  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
   769  	LONG $0x04c68348               // add    rsi, 4
   770  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
   771  	JNE  LBB0_1509
   772  	JMP  LBB0_1526
   773  
   774  LBB0_173:
   775  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   776  	JE   LBB0_389
   777  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   778  	JNE  LBB0_1526
   779  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   780  	JLE  LBB0_1526
   781  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   782  	LONG $0x20f88341         // cmp    r8d, 32
   783  	JB   LBB0_177
   784  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
   785  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   786  	JBE  LBB0_779
   787  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   788  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   789  	JBE  LBB0_779
   790  
   791  LBB0_177:
   792  	WORD $0xf631 // xor    esi, esi
   793  
   794  LBB0_1134:
   795  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   796  	WORD $0xf749; BYTE $0xd0 // not    r8
   797  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   798  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   799  	LONG $0x03e78348         // and    rdi, 3
   800  	JE   LBB0_1136
   801  
   802  LBB0_1135:
   803  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
   804  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   805  	LONG $0x01c68348         // add    rsi, 1
   806  	LONG $0xffc78348         // add    rdi, -1
   807  	JNE  LBB0_1135
   808  
   809  LBB0_1136:
   810  	LONG $0x03f88349 // cmp    r8, 3
   811  	JB   LBB0_1526
   812  
   813  LBB0_1137:
   814  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   815  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   816  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   817  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   818  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   819  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   820  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   821  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   822  	LONG $0x04c68348             // add    rsi, 4
   823  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   824  	JNE  LBB0_1137
   825  	JMP  LBB0_1526
   826  
   827  LBB0_178:
   828  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   829  	JE   LBB0_392
   830  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   831  	JNE  LBB0_1526
   832  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   833  	JLE  LBB0_1526
   834  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   835  	LONG $0x04f88341         // cmp    r8d, 4
   836  	JB   LBB0_182
   837  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   838  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   839  	JBE  LBB0_781
   840  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   841  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   842  	JBE  LBB0_781
   843  
   844  LBB0_182:
   845  	WORD $0xf631 // xor    esi, esi
   846  
   847  LBB0_1322:
   848  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   849  	WORD $0xf749; BYTE $0xd0 // not    r8
   850  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   851  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   852  	LONG $0x03e78348         // and    rdi, 3
   853  	JE   LBB0_1324
   854  
   855  LBB0_1323:
   856  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
   857  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   858  	LONG $0x01c68348         // add    rsi, 1
   859  	LONG $0xffc78348         // add    rdi, -1
   860  	JNE  LBB0_1323
   861  
   862  LBB0_1324:
   863  	LONG $0x03f88349 // cmp    r8, 3
   864  	JB   LBB0_1526
   865  
   866  LBB0_1325:
   867  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
   868  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   869  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
   870  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   871  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
   872  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   873  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
   874  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   875  	LONG $0x04c68348             // add    rsi, 4
   876  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   877  	JNE  LBB0_1325
   878  	JMP  LBB0_1526
   879  
   880  LBB0_183:
   881  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   882  	JE   LBB0_395
   883  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   884  	JNE  LBB0_1526
   885  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   886  	JLE  LBB0_1526
   887  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   888  	LONG $0x10f88341         // cmp    r8d, 16
   889  	JB   LBB0_187
   890  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
   891  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   892  	JBE  LBB0_784
   893  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   894  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   895  	JBE  LBB0_784
   896  
   897  LBB0_187:
   898  	WORD $0xf631 // xor    esi, esi
   899  
   900  LBB0_1330:
   901  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   902  	WORD $0xf749; BYTE $0xd0 // not    r8
   903  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   904  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   905  	LONG $0x03e78348         // and    rdi, 3
   906  	JE   LBB0_1332
   907  
   908  LBB0_1331:
   909  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
   910  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   911  	LONG $0x01c68348         // add    rsi, 1
   912  	LONG $0xffc78348         // add    rdi, -1
   913  	JNE  LBB0_1331
   914  
   915  LBB0_1332:
   916  	LONG $0x03f88349 // cmp    r8, 3
   917  	JB   LBB0_1526
   918  
   919  LBB0_1333:
   920  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
   921  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   922  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
   923  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   924  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
   925  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   926  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
   927  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   928  	LONG $0x04c68348             // add    rsi, 4
   929  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   930  	JNE  LBB0_1333
   931  	JMP  LBB0_1526
   932  
   933  LBB0_188:
   934  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   935  	JE   LBB0_398
   936  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   937  	JNE  LBB0_1526
   938  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   939  	JLE  LBB0_1526
   940  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   941  	LONG $0x10f88341         // cmp    r8d, 16
   942  	JB   LBB0_192
   943  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
   944  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   945  	JBE  LBB0_787
   946  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
   947  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
   948  	JBE  LBB0_787
   949  
   950  LBB0_192:
   951  	WORD $0xf631 // xor    esi, esi
   952  
   953  LBB0_1514:
   954  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
   955  	WORD $0xf749; BYTE $0xd0 // not    r8
   956  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
   957  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
   958  	LONG $0x03e78348         // and    rdi, 3
   959  	JE   LBB0_1516
   960  
   961  LBB0_1515:
   962  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
   963  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
   964  	LONG $0x01c68348         // add    rsi, 1
   965  	LONG $0xffc78348         // add    rdi, -1
   966  	JNE  LBB0_1515
   967  
   968  LBB0_1516:
   969  	LONG $0x03f88349 // cmp    r8, 3
   970  	JB   LBB0_1526
   971  
   972  LBB0_1517:
   973  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
   974  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
   975  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
   976  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
   977  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
   978  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
   979  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
   980  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
   981  	LONG $0x04c68348             // add    rsi, 4
   982  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
   983  	JNE  LBB0_1517
   984  	JMP  LBB0_1526
   985  
   986  LBB0_193:
   987  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
   988  	JE   LBB0_401
   989  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
   990  	JNE  LBB0_1526
   991  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
   992  	JLE  LBB0_1526
   993  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   994  	LONG $0x04f88341         // cmp    r8d, 4
   995  	JB   LBB0_197
   996  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
   997  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
   998  	JBE  LBB0_790
   999  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1000  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1001  	JBE  LBB0_790
  1002  
  1003  LBB0_197:
  1004  	WORD $0xf631 // xor    esi, esi
  1005  
  1006  LBB0_1338:
  1007  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1008  	WORD $0xf749; BYTE $0xd0 // not    r8
  1009  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1010  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1011  	LONG $0x03e78348         // and    rdi, 3
  1012  	JE   LBB0_1340
  1013  
  1014  LBB0_1339:
  1015  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  1016  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1017  	LONG $0x01c68348         // add    rsi, 1
  1018  	LONG $0xffc78348         // add    rdi, -1
  1019  	JNE  LBB0_1339
  1020  
  1021  LBB0_1340:
  1022  	LONG $0x03f88349 // cmp    r8, 3
  1023  	JB   LBB0_1526
  1024  
  1025  LBB0_1341:
  1026  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  1027  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1028  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  1029  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1030  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  1031  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1032  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  1033  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1034  	LONG $0x04c68348             // add    rsi, 4
  1035  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1036  	JNE  LBB0_1341
  1037  	JMP  LBB0_1526
  1038  
  1039  LBB0_198:
  1040  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1041  	JE   LBB0_404
  1042  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1043  	JNE  LBB0_1526
  1044  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1045  	JLE  LBB0_1526
  1046  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1047  	LONG $0x08f88341         // cmp    r8d, 8
  1048  	JB   LBB0_202
  1049  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  1050  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1051  	JBE  LBB0_793
  1052  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1053  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1054  	JBE  LBB0_793
  1055  
  1056  LBB0_202:
  1057  	WORD $0xf631 // xor    esi, esi
  1058  
  1059  LBB0_1522:
  1060  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1061  	WORD $0xf749; BYTE $0xd0 // not    r8
  1062  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1063  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1064  	LONG $0x03e78348         // and    rdi, 3
  1065  	JE   LBB0_1524
  1066  
  1067  LBB0_1523:
  1068  	LONG $0x042c0ff3; BYTE $0xb2 // cvttss2si    eax, dword [rdx + 4*rsi]
  1069  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1070  	LONG $0x01c68348             // add    rsi, 1
  1071  	LONG $0xffc78348             // add    rdi, -1
  1072  	JNE  LBB0_1523
  1073  
  1074  LBB0_1524:
  1075  	LONG $0x03f88349 // cmp    r8, 3
  1076  	JB   LBB0_1526
  1077  
  1078  LBB0_1525:
  1079  	LONG $0x042c0ff3; BYTE $0xb2   // cvttss2si    eax, dword [rdx + 4*rsi]
  1080  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  1081  	LONG $0x442c0ff3; WORD $0x04b2 // cvttss2si    eax, dword [rdx + 4*rsi + 4]
  1082  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  1083  	LONG $0x442c0ff3; WORD $0x08b2 // cvttss2si    eax, dword [rdx + 4*rsi + 8]
  1084  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  1085  	LONG $0x442c0ff3; WORD $0x0cb2 // cvttss2si    eax, dword [rdx + 4*rsi + 12]
  1086  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  1087  	LONG $0x04c68348               // add    rsi, 4
  1088  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  1089  	JNE  LBB0_1525
  1090  	JMP  LBB0_1526
  1091  
  1092  LBB0_203:
  1093  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1094  	JE   LBB0_407
  1095  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1096  	JNE  LBB0_1526
  1097  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1098  	JLE  LBB0_1526
  1099  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1100  	LONG $0x20f88341         // cmp    r8d, 32
  1101  	JB   LBB0_207
  1102  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1103  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1104  	JBE  LBB0_796
  1105  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1106  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1107  	JBE  LBB0_796
  1108  
  1109  LBB0_207:
  1110  	WORD $0xf631 // xor    esi, esi
  1111  
  1112  LBB0_1144:
  1113  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1114  	WORD $0xf749; BYTE $0xd0 // not    r8
  1115  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1116  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1117  	LONG $0x03e78348         // and    rdi, 3
  1118  	JE   LBB0_1146
  1119  
  1120  LBB0_1145:
  1121  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  1122  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1123  	LONG $0x01c68348         // add    rsi, 1
  1124  	LONG $0xffc78348         // add    rdi, -1
  1125  	JNE  LBB0_1145
  1126  
  1127  LBB0_1146:
  1128  	LONG $0x03f88349 // cmp    r8, 3
  1129  	JB   LBB0_1526
  1130  
  1131  LBB0_1147:
  1132  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1133  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1134  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1135  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1136  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1137  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1138  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1139  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1140  	LONG $0x04c68348             // add    rsi, 4
  1141  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1142  	JNE  LBB0_1147
  1143  	JMP  LBB0_1526
  1144  
  1145  LBB0_208:
  1146  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
  1147  	JE   LBB0_410
  1148  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
  1149  	JNE  LBB0_1526
  1150  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1151  	JLE  LBB0_1526
  1152  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1153  	LONG $0x08f88341         // cmp    r8d, 8
  1154  	JB   LBB0_212
  1155  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  1156  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1157  	JBE  LBB0_798
  1158  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  1159  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1160  	JBE  LBB0_798
  1161  
  1162  LBB0_212:
  1163  	WORD $0xf631 // xor    esi, esi
  1164  
  1165  LBB0_1346:
  1166  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1167  	WORD $0xf749; BYTE $0xd0 // not    r8
  1168  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1169  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1170  	LONG $0x03e78348         // and    rdi, 3
  1171  	JE   LBB0_1348
  1172  
  1173  LBB0_1347:
  1174  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  1175  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  1176  	LONG $0x01c68348         // add    rsi, 1
  1177  	LONG $0xffc78348         // add    rdi, -1
  1178  	JNE  LBB0_1347
  1179  
  1180  LBB0_1348:
  1181  	LONG $0x03f88349 // cmp    r8, 3
  1182  	JB   LBB0_1526
  1183  
  1184  LBB0_1349:
  1185  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  1186  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  1187  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  1188  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  1189  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  1190  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  1191  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  1192  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  1193  	LONG $0x04c68348             // add    rsi, 4
  1194  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1195  	JNE  LBB0_1349
  1196  	JMP  LBB0_1526
  1197  
  1198  LBB0_213:
  1199  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1200  	JE   LBB0_413
  1201  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1202  	JNE  LBB0_1526
  1203  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1204  	JLE  LBB0_1526
  1205  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1206  	LONG $0x04f88341         // cmp    r8d, 4
  1207  	JAE  LBB0_549
  1208  	WORD $0xf631             // xor    esi, esi
  1209  	JMP  LBB0_807
  1210  
  1211  LBB0_218:
  1212  	WORD $0xfe83; BYTE $0x07               // cmp    esi, 7
  1213  	JE   LBB0_416
  1214  	WORD $0xfe83; BYTE $0x08               // cmp    esi, 8
  1215  	JNE  LBB0_1526
  1216  	WORD $0x8545; BYTE $0xc0               // test    r8d, r8d
  1217  	JLE  LBB0_1526
  1218  	WORD $0x8945; BYTE $0xc1               // mov    r9d, r8d
  1219  	LONG $0xff418d49                       // lea    rax, [r9 - 1]
  1220  	WORD $0x8945; BYTE $0xc8               // mov    r8d, r9d
  1221  	LONG $0x03e08341                       // and    r8d, 3
  1222  	QUAD $0x000000000000ba49; WORD $0x8000 // mov    r10, -9223372036854775808
  1223  	LONG $0x03f88348                       // cmp    rax, 3
  1224  	JAE  LBB0_551
  1225  	WORD $0xc031                           // xor    eax, eax
  1226  	JMP  LBB0_553
  1227  
  1228  LBB0_223:
  1229  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1230  	JE   LBB0_419
  1231  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1232  	JNE  LBB0_1526
  1233  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1234  	JLE  LBB0_1526
  1235  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1236  	LONG $0x04f88341         // cmp    r8d, 4
  1237  	JB   LBB0_227
  1238  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1239  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1240  	JBE  LBB0_808
  1241  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1242  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1243  	JBE  LBB0_808
  1244  
  1245  LBB0_227:
  1246  	WORD $0xf631 // xor    esi, esi
  1247  
  1248  LBB0_1154:
  1249  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1250  	WORD $0xf749; BYTE $0xd0 // not    r8
  1251  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1252  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1253  	LONG $0x03e78348         // and    rdi, 3
  1254  	JE   LBB0_1156
  1255  
  1256  LBB0_1155:
  1257  	LONG $0x04be0f48; BYTE $0x32 // movsx    rax, byte [rdx + rsi]
  1258  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1259  	LONG $0x01c68348             // add    rsi, 1
  1260  	LONG $0xffc78348             // add    rdi, -1
  1261  	JNE  LBB0_1155
  1262  
  1263  LBB0_1156:
  1264  	LONG $0x03f88349 // cmp    r8, 3
  1265  	JB   LBB0_1526
  1266  
  1267  LBB0_1157:
  1268  	LONG $0x04be0f48; BYTE $0x32   // movsx    rax, byte [rdx + rsi]
  1269  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  1270  	LONG $0x44be0f48; WORD $0x0132 // movsx    rax, byte [rdx + rsi + 1]
  1271  	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
  1272  	LONG $0x44be0f48; WORD $0x0232 // movsx    rax, byte [rdx + rsi + 2]
  1273  	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
  1274  	LONG $0x44be0f48; WORD $0x0332 // movsx    rax, byte [rdx + rsi + 3]
  1275  	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
  1276  	LONG $0x04c68348               // add    rsi, 4
  1277  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  1278  	JNE  LBB0_1157
  1279  	JMP  LBB0_1526
  1280  
  1281  LBB0_228:
  1282  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1283  	JE   LBB0_422
  1284  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1285  	JNE  LBB0_1526
  1286  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1287  	JLE  LBB0_1526
  1288  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1289  	LONG $0x04f88341         // cmp    r8d, 4
  1290  	JB   LBB0_232
  1291  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  1292  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1293  	JBE  LBB0_810
  1294  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1295  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1296  	JBE  LBB0_810
  1297  
  1298  LBB0_232:
  1299  	WORD $0xf631 // xor    esi, esi
  1300  
  1301  LBB0_1164:
  1302  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1303  	WORD $0xf749; BYTE $0xd0 // not    r8
  1304  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1305  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1306  	LONG $0x03e78348         // and    rdi, 3
  1307  	JE   LBB0_1166
  1308  
  1309  LBB0_1165:
  1310  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1311  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1312  	LONG $0x01c68348 // add    rsi, 1
  1313  	LONG $0xffc78348 // add    rdi, -1
  1314  	JNE  LBB0_1165
  1315  
  1316  LBB0_1166:
  1317  	LONG $0x03f88349 // cmp    r8, 3
  1318  	JB   LBB0_1526
  1319  
  1320  LBB0_1167:
  1321  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1322  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1323  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1324  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1325  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1326  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1327  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1328  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1329  	LONG $0x04c68348             // add    rsi, 4
  1330  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1331  	JNE  LBB0_1167
  1332  	JMP  LBB0_1526
  1333  
  1334  LBB0_233:
  1335  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1336  	JE   LBB0_425
  1337  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1338  	JNE  LBB0_1526
  1339  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1340  	JLE  LBB0_1526
  1341  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1342  	LONG $0x04f88341         // cmp    r8d, 4
  1343  	JAE  LBB0_562
  1344  	WORD $0xf631             // xor    esi, esi
  1345  	JMP  LBB0_818
  1346  
  1347  LBB0_238:
  1348  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1349  	JE   LBB0_428
  1350  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1351  	JNE  LBB0_1526
  1352  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1353  	JLE  LBB0_1526
  1354  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1355  	LONG $0x04f88341         // cmp    r8d, 4
  1356  	JAE  LBB0_564
  1357  	WORD $0xf631             // xor    esi, esi
  1358  	JMP  LBB0_825
  1359  
  1360  LBB0_243:
  1361  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1362  	JE   LBB0_431
  1363  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1364  	JNE  LBB0_1526
  1365  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1366  	JLE  LBB0_1526
  1367  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1368  	LONG $0x04f88341         // cmp    r8d, 4
  1369  	JB   LBB0_247
  1370  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  1371  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1372  	JBE  LBB0_826
  1373  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1374  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1375  	JBE  LBB0_826
  1376  
  1377  LBB0_247:
  1378  	WORD $0xf631 // xor    esi, esi
  1379  
  1380  LBB0_1174:
  1381  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1382  	WORD $0xf749; BYTE $0xd0 // not    r8
  1383  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1384  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1385  	LONG $0x03e78348         // and    rdi, 3
  1386  	JE   LBB0_1176
  1387  
  1388  LBB0_1175:
  1389  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1390  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1391  	LONG $0x01c68348 // add    rsi, 1
  1392  	LONG $0xffc78348 // add    rdi, -1
  1393  	JNE  LBB0_1175
  1394  
  1395  LBB0_1176:
  1396  	LONG $0x03f88349 // cmp    r8, 3
  1397  	JB   LBB0_1526
  1398  
  1399  LBB0_1177:
  1400  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1401  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1402  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1403  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1404  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1405  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1406  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1407  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1408  	LONG $0x04c68348             // add    rsi, 4
  1409  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1410  	JNE  LBB0_1177
  1411  	JMP  LBB0_1526
  1412  
  1413  LBB0_248:
  1414  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1415  	JE   LBB0_434
  1416  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1417  	JNE  LBB0_1526
  1418  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1419  	JLE  LBB0_1526
  1420  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1421  	LONG $0xff418d49         // lea    rax, [r9 - 1]
  1422  	WORD $0x8945; BYTE $0xc8 // mov    r8d, r9d
  1423  	LONG $0x03e08341         // and    r8d, 3
  1424  	LONG $0x03f88348         // cmp    rax, 3
  1425  	JAE  LBB0_569
  1426  	WORD $0xff31             // xor    edi, edi
  1427  	JMP  LBB0_571
  1428  
  1429  LBB0_253:
  1430  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1431  	JE   LBB0_437
  1432  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1433  	JNE  LBB0_1526
  1434  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1435  	JLE  LBB0_1526
  1436  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1437  	LONG $0x04f88341         // cmp    r8d, 4
  1438  	JB   LBB0_257
  1439  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1440  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1441  	JBE  LBB0_828
  1442  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  1443  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1444  	JBE  LBB0_828
  1445  
  1446  LBB0_257:
  1447  	WORD $0xf631 // xor    esi, esi
  1448  
  1449  LBB0_1184:
  1450  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1451  	WORD $0xf749; BYTE $0xd0 // not    r8
  1452  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1453  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1454  	LONG $0x03e78348         // and    rdi, 3
  1455  	JE   LBB0_1186
  1456  
  1457  LBB0_1185:
  1458  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  1459  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  1460  	LONG $0x01c68348 // add    rsi, 1
  1461  	LONG $0xffc78348 // add    rdi, -1
  1462  	JNE  LBB0_1185
  1463  
  1464  LBB0_1186:
  1465  	LONG $0x03f88349 // cmp    r8, 3
  1466  	JB   LBB0_1526
  1467  
  1468  LBB0_1187:
  1469  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1470  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  1471  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1472  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  1473  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1474  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  1475  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1476  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  1477  	LONG $0x04c68348             // add    rsi, 4
  1478  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1479  	JNE  LBB0_1187
  1480  	JMP  LBB0_1526
  1481  
  1482  LBB0_258:
  1483  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
  1484  	JE   LBB0_440
  1485  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
  1486  	JNE  LBB0_1526
  1487  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1488  	JLE  LBB0_1526
  1489  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1490  	LONG $0x04f88341         // cmp    r8d, 4
  1491  	JAE  LBB0_577
  1492  	WORD $0xf631             // xor    esi, esi
  1493  	JMP  LBB0_836
  1494  
  1495  LBB0_263:
  1496  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1497  	JLE  LBB0_1526
  1498  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1499  	LONG $0x08f88341         // cmp    r8d, 8
  1500  	JAE  LBB0_579
  1501  	WORD $0xf631             // xor    esi, esi
  1502  	JMP  LBB0_984
  1503  
  1504  LBB0_266:
  1505  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1506  	JLE  LBB0_1526
  1507  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1508  	LONG $0x08f88341         // cmp    r8d, 8
  1509  	JAE  LBB0_582
  1510  	WORD $0xf631             // xor    esi, esi
  1511  	JMP  LBB0_989
  1512  
  1513  LBB0_269:
  1514  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1515  	JLE  LBB0_1526
  1516  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1517  	LONG $0x04f88341         // cmp    r8d, 4
  1518  	JAE  LBB0_585
  1519  	WORD $0xf631             // xor    esi, esi
  1520  	JMP  LBB0_994
  1521  
  1522  LBB0_272:
  1523  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1524  	JLE  LBB0_1526
  1525  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1526  	LONG $0x04f88341         // cmp    r8d, 4
  1527  	JAE  LBB0_588
  1528  	WORD $0xf631             // xor    esi, esi
  1529  	JMP  LBB0_999
  1530  
  1531  LBB0_275:
  1532  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1533  	JLE  LBB0_1526
  1534  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1535  	LONG $0x10f88341         // cmp    r8d, 16
  1536  	JB   LBB0_277
  1537  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1538  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1539  	JBE  LBB0_837
  1540  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1541  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1542  	JBE  LBB0_837
  1543  
  1544  LBB0_277:
  1545  	WORD $0xf631 // xor    esi, esi
  1546  
  1547  LBB0_1354:
  1548  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1549  	WORD $0xf749; BYTE $0xd0 // not    r8
  1550  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1551  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  1552  	LONG $0x03e08348         // and    rax, 3
  1553  	JE   LBB0_1356
  1554  
  1555  LBB0_1355:
  1556  	LONG $0x323cbe0f // movsx    edi, byte [rdx + rsi]
  1557  	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
  1558  	LONG $0x01c68348 // add    rsi, 1
  1559  	LONG $0xffc08348 // add    rax, -1
  1560  	JNE  LBB0_1355
  1561  
  1562  LBB0_1356:
  1563  	LONG $0x03f88349 // cmp    r8, 3
  1564  	JB   LBB0_1526
  1565  
  1566  LBB0_1357:
  1567  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  1568  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1569  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  1570  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1571  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  1572  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1573  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  1574  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1575  	LONG $0x04c68348             // add    rsi, 4
  1576  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1577  	JNE  LBB0_1357
  1578  	JMP  LBB0_1526
  1579  
  1580  LBB0_278:
  1581  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1582  	JLE  LBB0_1526
  1583  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1584  	LONG $0x10f88341         // cmp    r8d, 16
  1585  	JB   LBB0_280
  1586  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1587  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1588  	JBE  LBB0_840
  1589  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1590  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1591  	JBE  LBB0_840
  1592  
  1593  LBB0_280:
  1594  	WORD $0xf631 // xor    esi, esi
  1595  
  1596  LBB0_1362:
  1597  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1598  	WORD $0xf749; BYTE $0xd0 // not    r8
  1599  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1600  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  1601  	LONG $0x03e08348         // and    rax, 3
  1602  	JE   LBB0_1364
  1603  
  1604  LBB0_1363:
  1605  	LONG $0x323cbe0f // movsx    edi, byte [rdx + rsi]
  1606  	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
  1607  	LONG $0x01c68348 // add    rsi, 1
  1608  	LONG $0xffc08348 // add    rax, -1
  1609  	JNE  LBB0_1363
  1610  
  1611  LBB0_1364:
  1612  	LONG $0x03f88349 // cmp    r8, 3
  1613  	JB   LBB0_1526
  1614  
  1615  LBB0_1365:
  1616  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  1617  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1618  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  1619  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1620  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  1621  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1622  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  1623  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1624  	LONG $0x04c68348             // add    rsi, 4
  1625  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1626  	JNE  LBB0_1365
  1627  	JMP  LBB0_1526
  1628  
  1629  LBB0_281:
  1630  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1631  	JLE  LBB0_1526
  1632  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1633  	LONG $0x04f88341         // cmp    r8d, 4
  1634  	JAE  LBB0_597
  1635  	WORD $0xf631             // xor    esi, esi
  1636  	JMP  LBB0_1004
  1637  
  1638  LBB0_284:
  1639  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1640  	JLE  LBB0_1526
  1641  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1642  	LONG $0x04f88341         // cmp    r8d, 4
  1643  	JAE  LBB0_600
  1644  	WORD $0xf631             // xor    esi, esi
  1645  	JMP  LBB0_1009
  1646  
  1647  LBB0_287:
  1648  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1649  	JLE  LBB0_1526
  1650  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1651  	LONG $0x10f88341         // cmp    r8d, 16
  1652  	JB   LBB0_289
  1653  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1654  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1655  	JBE  LBB0_843
  1656  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1657  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1658  	JBE  LBB0_843
  1659  
  1660  LBB0_289:
  1661  	WORD $0xf631 // xor    esi, esi
  1662  
  1663  LBB0_1194:
  1664  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1665  	WORD $0xf749; BYTE $0xd0 // not    r8
  1666  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1667  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1668  	LONG $0x03e78348         // and    rdi, 3
  1669  	JE   LBB0_1196
  1670  
  1671  LBB0_1195:
  1672  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1673  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1674  	LONG $0x01c68348 // add    rsi, 1
  1675  	LONG $0xffc78348 // add    rdi, -1
  1676  	JNE  LBB0_1195
  1677  
  1678  LBB0_1196:
  1679  	LONG $0x03f88349 // cmp    r8, 3
  1680  	JB   LBB0_1526
  1681  
  1682  LBB0_1197:
  1683  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1684  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1685  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1686  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1687  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1688  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1689  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1690  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1691  	LONG $0x04c68348             // add    rsi, 4
  1692  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1693  	JNE  LBB0_1197
  1694  	JMP  LBB0_1526
  1695  
  1696  LBB0_290:
  1697  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1698  	JLE  LBB0_1526
  1699  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1700  	LONG $0x10f88341         // cmp    r8d, 16
  1701  	JB   LBB0_292
  1702  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1703  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1704  	JBE  LBB0_845
  1705  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1706  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1707  	JBE  LBB0_845
  1708  
  1709  LBB0_292:
  1710  	WORD $0xf631 // xor    esi, esi
  1711  
  1712  LBB0_1204:
  1713  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1714  	WORD $0xf749; BYTE $0xd0 // not    r8
  1715  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1716  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1717  	LONG $0x03e78348         // and    rdi, 3
  1718  	JE   LBB0_1206
  1719  
  1720  LBB0_1205:
  1721  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1722  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1723  	LONG $0x01c68348 // add    rsi, 1
  1724  	LONG $0xffc78348 // add    rdi, -1
  1725  	JNE  LBB0_1205
  1726  
  1727  LBB0_1206:
  1728  	LONG $0x03f88349 // cmp    r8, 3
  1729  	JB   LBB0_1526
  1730  
  1731  LBB0_1207:
  1732  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1733  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1734  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1735  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1736  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1737  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1738  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1739  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1740  	LONG $0x04c68348             // add    rsi, 4
  1741  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1742  	JNE  LBB0_1207
  1743  	JMP  LBB0_1526
  1744  
  1745  LBB0_293:
  1746  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1747  	JLE  LBB0_1526
  1748  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1749  	LONG $0x10f88341         // cmp    r8d, 16
  1750  	JB   LBB0_295
  1751  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1752  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1753  	JBE  LBB0_847
  1754  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1755  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1756  	JBE  LBB0_847
  1757  
  1758  LBB0_295:
  1759  	WORD $0xf631 // xor    esi, esi
  1760  
  1761  LBB0_1214:
  1762  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1763  	WORD $0xf749; BYTE $0xd0 // not    r8
  1764  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1765  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1766  	LONG $0x03e78348         // and    rdi, 3
  1767  	JE   LBB0_1216
  1768  
  1769  LBB0_1215:
  1770  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1771  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1772  	LONG $0x01c68348 // add    rsi, 1
  1773  	LONG $0xffc78348 // add    rdi, -1
  1774  	JNE  LBB0_1215
  1775  
  1776  LBB0_1216:
  1777  	LONG $0x03f88349 // cmp    r8, 3
  1778  	JB   LBB0_1526
  1779  
  1780  LBB0_1217:
  1781  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1782  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1783  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1784  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1785  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1786  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1787  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1788  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1789  	LONG $0x04c68348             // add    rsi, 4
  1790  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1791  	JNE  LBB0_1217
  1792  	JMP  LBB0_1526
  1793  
  1794  LBB0_296:
  1795  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1796  	JLE  LBB0_1526
  1797  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1798  	LONG $0x10f88341         // cmp    r8d, 16
  1799  	JB   LBB0_298
  1800  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  1801  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1802  	JBE  LBB0_849
  1803  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1804  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1805  	JBE  LBB0_849
  1806  
  1807  LBB0_298:
  1808  	WORD $0xf631 // xor    esi, esi
  1809  
  1810  LBB0_1224:
  1811  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1812  	WORD $0xf749; BYTE $0xd0 // not    r8
  1813  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1814  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  1815  	LONG $0x03e78348         // and    rdi, 3
  1816  	JE   LBB0_1226
  1817  
  1818  LBB0_1225:
  1819  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
  1820  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
  1821  	LONG $0x01c68348 // add    rsi, 1
  1822  	LONG $0xffc78348 // add    rdi, -1
  1823  	JNE  LBB0_1225
  1824  
  1825  LBB0_1226:
  1826  	LONG $0x03f88349 // cmp    r8, 3
  1827  	JB   LBB0_1526
  1828  
  1829  LBB0_1227:
  1830  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1831  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1832  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
  1833  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1834  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
  1835  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1836  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
  1837  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1838  	LONG $0x04c68348             // add    rsi, 4
  1839  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1840  	JNE  LBB0_1227
  1841  	JMP  LBB0_1526
  1842  
  1843  LBB0_299:
  1844  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1845  	JLE  LBB0_1526
  1846  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1847  	LONG $0x04f88341         // cmp    r8d, 4
  1848  	JAE  LBB0_615
  1849  	WORD $0xf631             // xor    esi, esi
  1850  	JMP  LBB0_1014
  1851  
  1852  LBB0_302:
  1853  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1854  	JLE  LBB0_1526
  1855  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1856  	LONG $0x04f88341         // cmp    r8d, 4
  1857  	JAE  LBB0_618
  1858  	WORD $0xf631             // xor    esi, esi
  1859  	JMP  LBB0_1019
  1860  
  1861  LBB0_305:
  1862  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1863  	JLE  LBB0_1526
  1864  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1865  	LONG $0x08f88341         // cmp    r8d, 8
  1866  	JAE  LBB0_621
  1867  	WORD $0xf631             // xor    esi, esi
  1868  	JMP  LBB0_1024
  1869  
  1870  LBB0_308:
  1871  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1872  	JLE  LBB0_1526
  1873  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1874  	LONG $0x08f88341         // cmp    r8d, 8
  1875  	JAE  LBB0_624
  1876  	WORD $0xf631             // xor    esi, esi
  1877  	JMP  LBB0_1029
  1878  
  1879  LBB0_311:
  1880  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1881  	JLE  LBB0_1526
  1882  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1883  	LONG $0x10f88341         // cmp    r8d, 16
  1884  	JB   LBB0_313
  1885  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1886  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1887  	JBE  LBB0_851
  1888  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1889  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1890  	JBE  LBB0_851
  1891  
  1892  LBB0_313:
  1893  	WORD $0xf631 // xor    esi, esi
  1894  
  1895  LBB0_1370:
  1896  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1897  	WORD $0xf749; BYTE $0xd0 // not    r8
  1898  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1899  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  1900  	LONG $0x03e08348         // and    rax, 3
  1901  	JE   LBB0_1372
  1902  
  1903  LBB0_1371:
  1904  	LONG $0x323cb60f // movzx    edi, byte [rdx + rsi]
  1905  	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
  1906  	LONG $0x01c68348 // add    rsi, 1
  1907  	LONG $0xffc08348 // add    rax, -1
  1908  	JNE  LBB0_1371
  1909  
  1910  LBB0_1372:
  1911  	LONG $0x03f88349 // cmp    r8, 3
  1912  	JB   LBB0_1526
  1913  
  1914  LBB0_1373:
  1915  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1916  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1917  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1918  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1919  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1920  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1921  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1922  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1923  	LONG $0x04c68348             // add    rsi, 4
  1924  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1925  	JNE  LBB0_1373
  1926  	JMP  LBB0_1526
  1927  
  1928  LBB0_314:
  1929  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1930  	JLE  LBB0_1526
  1931  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1932  	LONG $0x10f88341         // cmp    r8d, 16
  1933  	JB   LBB0_316
  1934  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  1935  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  1936  	JBE  LBB0_854
  1937  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
  1938  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  1939  	JBE  LBB0_854
  1940  
  1941  LBB0_316:
  1942  	WORD $0xf631 // xor    esi, esi
  1943  
  1944  LBB0_1378:
  1945  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  1946  	WORD $0xf749; BYTE $0xd0 // not    r8
  1947  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  1948  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  1949  	LONG $0x03e08348         // and    rax, 3
  1950  	JE   LBB0_1380
  1951  
  1952  LBB0_1379:
  1953  	LONG $0x323cb60f // movzx    edi, byte [rdx + rsi]
  1954  	LONG $0x713c8966 // mov    word [rcx + 2*rsi], di
  1955  	LONG $0x01c68348 // add    rsi, 1
  1956  	LONG $0xffc08348 // add    rax, -1
  1957  	JNE  LBB0_1379
  1958  
  1959  LBB0_1380:
  1960  	LONG $0x03f88349 // cmp    r8, 3
  1961  	JB   LBB0_1526
  1962  
  1963  LBB0_1381:
  1964  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  1965  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  1966  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  1967  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
  1968  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  1969  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
  1970  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  1971  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
  1972  	LONG $0x04c68348             // add    rsi, 4
  1973  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  1974  	JNE  LBB0_1381
  1975  	JMP  LBB0_1526
  1976  
  1977  LBB0_317:
  1978  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1979  	JLE  LBB0_1526
  1980  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1981  	LONG $0x08f88341         // cmp    r8d, 8
  1982  	JAE  LBB0_633
  1983  	WORD $0xf631             // xor    esi, esi
  1984  	JMP  LBB0_1034
  1985  
  1986  LBB0_320:
  1987  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1988  	JLE  LBB0_1526
  1989  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1990  	LONG $0x08f88341         // cmp    r8d, 8
  1991  	JAE  LBB0_636
  1992  	WORD $0xf631             // xor    esi, esi
  1993  	JMP  LBB0_1039
  1994  
  1995  LBB0_323:
  1996  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  1997  	JLE  LBB0_1526
  1998  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  1999  	LONG $0x04f88341         // cmp    r8d, 4
  2000  	JAE  LBB0_639
  2001  	WORD $0xf631             // xor    esi, esi
  2002  	JMP  LBB0_863
  2003  
  2004  LBB0_326:
  2005  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2006  	JLE  LBB0_1526
  2007  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2008  	LONG $0x08f88341         // cmp    r8d, 8
  2009  	JAE  LBB0_641
  2010  	WORD $0xf631             // xor    esi, esi
  2011  	JMP  LBB0_1044
  2012  
  2013  LBB0_329:
  2014  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2015  	JLE  LBB0_1526
  2016  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
  2017  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  2018  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
  2019  	LONG $0x03e08341         // and    r8d, 3
  2020  	LONG $0x03ff8348         // cmp    rdi, 3
  2021  	JAE  LBB0_644
  2022  	WORD $0xff31             // xor    edi, edi
  2023  	JMP  LBB0_646
  2024  
  2025  LBB0_332:
  2026  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2027  	JLE  LBB0_1526
  2028  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2029  	LONG $0x04f88341         // cmp    r8d, 4
  2030  	JAE  LBB0_649
  2031  	WORD $0xf631             // xor    esi, esi
  2032  	JMP  LBB0_1049
  2033  
  2034  LBB0_335:
  2035  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2036  	JLE  LBB0_1526
  2037  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2038  	LONG $0x04f88341         // cmp    r8d, 4
  2039  	JB   LBB0_337
  2040  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2041  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2042  	JBE  LBB0_864
  2043  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2044  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2045  	JBE  LBB0_864
  2046  
  2047  LBB0_337:
  2048  	WORD $0xf631 // xor    esi, esi
  2049  
  2050  LBB0_1234:
  2051  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2052  	WORD $0xf749; BYTE $0xd0 // not    r8
  2053  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2054  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2055  	LONG $0x03e78348         // and    rdi, 3
  2056  	JE   LBB0_1236
  2057  
  2058  LBB0_1235:
  2059  	LONG $0x04be0f48; BYTE $0x32 // movsx    rax, byte [rdx + rsi]
  2060  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2061  	LONG $0x01c68348             // add    rsi, 1
  2062  	LONG $0xffc78348             // add    rdi, -1
  2063  	JNE  LBB0_1235
  2064  
  2065  LBB0_1236:
  2066  	LONG $0x03f88349 // cmp    r8, 3
  2067  	JB   LBB0_1526
  2068  
  2069  LBB0_1237:
  2070  	LONG $0x04be0f48; BYTE $0x32   // movsx    rax, byte [rdx + rsi]
  2071  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  2072  	LONG $0x44be0f48; WORD $0x0132 // movsx    rax, byte [rdx + rsi + 1]
  2073  	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
  2074  	LONG $0x44be0f48; WORD $0x0232 // movsx    rax, byte [rdx + rsi + 2]
  2075  	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
  2076  	LONG $0x44be0f48; WORD $0x0332 // movsx    rax, byte [rdx + rsi + 3]
  2077  	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
  2078  	LONG $0x04c68348               // add    rsi, 4
  2079  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2080  	JNE  LBB0_1237
  2081  	JMP  LBB0_1526
  2082  
  2083  LBB0_338:
  2084  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2085  	JLE  LBB0_1526
  2086  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2087  	LONG $0x08f88341         // cmp    r8d, 8
  2088  	JB   LBB0_340
  2089  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2090  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2091  	JBE  LBB0_866
  2092  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2093  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2094  	JBE  LBB0_866
  2095  
  2096  LBB0_340:
  2097  	WORD $0xf631 // xor    esi, esi
  2098  
  2099  LBB0_1386:
  2100  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2101  	WORD $0xf749; BYTE $0xd0 // not    r8
  2102  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2103  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2104  	LONG $0x03e78348         // and    rdi, 3
  2105  	JE   LBB0_1388
  2106  
  2107  LBB0_1387:
  2108  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  2109  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  2110  	LONG $0xc02a0ff3             // cvtsi2ss    xmm0, eax
  2111  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  2112  	LONG $0x01c68348             // add    rsi, 1
  2113  	LONG $0xffc78348             // add    rdi, -1
  2114  	JNE  LBB0_1387
  2115  
  2116  LBB0_1388:
  2117  	LONG $0x03f88349 // cmp    r8, 3
  2118  	JB   LBB0_1526
  2119  
  2120  LBB0_1389:
  2121  	LONG $0x3204be0f               // movsx    eax, byte [rdx + rsi]
  2122  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2123  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2124  	LONG $0x04110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm0
  2125  	LONG $0x3244be0f; BYTE $0x01   // movsx    eax, byte [rdx + rsi + 1]
  2126  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2127  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2128  	LONG $0x44110ff3; WORD $0x04b1 // movss    dword [rcx + 4*rsi + 4], xmm0
  2129  	LONG $0x3244be0f; BYTE $0x02   // movsx    eax, byte [rdx + rsi + 2]
  2130  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2131  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2132  	LONG $0x44110ff3; WORD $0x08b1 // movss    dword [rcx + 4*rsi + 8], xmm0
  2133  	LONG $0x3244be0f; BYTE $0x03   // movsx    eax, byte [rdx + rsi + 3]
  2134  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2135  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2136  	LONG $0x44110ff3; WORD $0x0cb1 // movss    dword [rcx + 4*rsi + 12], xmm0
  2137  	LONG $0x04c68348               // add    rsi, 4
  2138  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2139  	JNE  LBB0_1389
  2140  	JMP  LBB0_1526
  2141  
  2142  LBB0_341:
  2143  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2144  	JLE  LBB0_1526
  2145  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2146  	LONG $0x04f88341         // cmp    r8d, 4
  2147  	JB   LBB0_343
  2148  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2149  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2150  	JBE  LBB0_869
  2151  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2152  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2153  	JBE  LBB0_869
  2154  
  2155  LBB0_343:
  2156  	WORD $0xf631 // xor    esi, esi
  2157  
  2158  LBB0_1244:
  2159  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2160  	WORD $0xf749; BYTE $0xd0 // not    r8
  2161  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2162  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2163  	LONG $0x03e78348         // and    rdi, 3
  2164  	JE   LBB0_1246
  2165  
  2166  LBB0_1245:
  2167  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2168  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2169  	LONG $0x01c68348 // add    rsi, 1
  2170  	LONG $0xffc78348 // add    rdi, -1
  2171  	JNE  LBB0_1245
  2172  
  2173  LBB0_1246:
  2174  	LONG $0x03f88349 // cmp    r8, 3
  2175  	JB   LBB0_1526
  2176  
  2177  LBB0_1247:
  2178  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2179  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2180  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2181  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2182  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2183  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2184  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2185  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2186  	LONG $0x04c68348             // add    rsi, 4
  2187  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2188  	JNE  LBB0_1247
  2189  	JMP  LBB0_1526
  2190  
  2191  LBB0_344:
  2192  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2193  	JLE  LBB0_1526
  2194  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2195  	LONG $0x04f88341         // cmp    r8d, 4
  2196  	JAE  LBB0_661
  2197  	WORD $0xf631             // xor    esi, esi
  2198  	JMP  LBB0_1056
  2199  
  2200  LBB0_347:
  2201  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2202  	JLE  LBB0_1526
  2203  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2204  	LONG $0x04f88341         // cmp    r8d, 4
  2205  	JAE  LBB0_664
  2206  	WORD $0xf631             // xor    esi, esi
  2207  	JMP  LBB0_877
  2208  
  2209  LBB0_350:
  2210  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2211  	JLE  LBB0_1526
  2212  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2213  	LONG $0x08f88341         // cmp    r8d, 8
  2214  	JAE  LBB0_666
  2215  	WORD $0xf631             // xor    esi, esi
  2216  	JMP  LBB0_1062
  2217  
  2218  LBB0_353:
  2219  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2220  	JLE  LBB0_1526
  2221  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2222  	LONG $0x04f88341         // cmp    r8d, 4
  2223  	JAE  LBB0_669
  2224  	WORD $0xf631             // xor    esi, esi
  2225  	JMP  LBB0_884
  2226  
  2227  LBB0_356:
  2228  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2229  	JLE  LBB0_1526
  2230  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2231  	LONG $0x08f88341         // cmp    r8d, 8
  2232  	JAE  LBB0_671
  2233  	WORD $0xf631             // xor    esi, esi
  2234  	JMP  LBB0_1067
  2235  
  2236  LBB0_359:
  2237  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2238  	JLE  LBB0_1526
  2239  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2240  	LONG $0x04f88341         // cmp    r8d, 4
  2241  	JB   LBB0_361
  2242  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2243  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2244  	JBE  LBB0_885
  2245  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2246  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2247  	JBE  LBB0_885
  2248  
  2249  LBB0_361:
  2250  	WORD $0xf631 // xor    esi, esi
  2251  
  2252  LBB0_1254:
  2253  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2254  	WORD $0xf749; BYTE $0xd0 // not    r8
  2255  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2256  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2257  	LONG $0x03e78348         // and    rdi, 3
  2258  	JE   LBB0_1256
  2259  
  2260  LBB0_1255:
  2261  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2262  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2263  	LONG $0x01c68348 // add    rsi, 1
  2264  	LONG $0xffc78348 // add    rdi, -1
  2265  	JNE  LBB0_1255
  2266  
  2267  LBB0_1256:
  2268  	LONG $0x03f88349 // cmp    r8, 3
  2269  	JB   LBB0_1526
  2270  
  2271  LBB0_1257:
  2272  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2273  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2274  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2275  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2276  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2277  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2278  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2279  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2280  	LONG $0x04c68348             // add    rsi, 4
  2281  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2282  	JNE  LBB0_1257
  2283  	JMP  LBB0_1526
  2284  
  2285  LBB0_362:
  2286  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2287  	JLE  LBB0_1526
  2288  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
  2289  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  2290  	WORD $0xf089             // mov    eax, esi
  2291  	WORD $0xe083; BYTE $0x03 // and    eax, 3
  2292  	LONG $0x03ff8348         // cmp    rdi, 3
  2293  	JAE  LBB0_677
  2294  	WORD $0xff31             // xor    edi, edi
  2295  	JMP  LBB0_679
  2296  
  2297  LBB0_365:
  2298  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2299  	JLE  LBB0_1526
  2300  	WORD $0x8944; BYTE $0xc6 // mov    esi, r8d
  2301  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  2302  	WORD $0x8941; BYTE $0xf0 // mov    r8d, esi
  2303  	LONG $0x03e08341         // and    r8d, 3
  2304  	LONG $0x03ff8348         // cmp    rdi, 3
  2305  	JAE  LBB0_682
  2306  	WORD $0xff31             // xor    edi, edi
  2307  	JMP  LBB0_684
  2308  
  2309  LBB0_368:
  2310  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2311  	JLE  LBB0_1526
  2312  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2313  	LONG $0x08f88341         // cmp    r8d, 8
  2314  	JB   LBB0_370
  2315  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2316  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2317  	JBE  LBB0_887
  2318  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2319  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2320  	JBE  LBB0_887
  2321  
  2322  LBB0_370:
  2323  	WORD $0xf631 // xor    esi, esi
  2324  
  2325  LBB0_1264:
  2326  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2327  	WORD $0xf749; BYTE $0xd0 // not    r8
  2328  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2329  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2330  	LONG $0x07e78348         // and    rdi, 7
  2331  	JE   LBB0_1266
  2332  
  2333  LBB0_1265:
  2334  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  2335  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  2336  	LONG $0x01c68348         // add    rsi, 1
  2337  	LONG $0xffc78348         // add    rdi, -1
  2338  	JNE  LBB0_1265
  2339  
  2340  LBB0_1266:
  2341  	LONG $0x07f88349 // cmp    r8, 7
  2342  	JB   LBB0_1526
  2343  
  2344  LBB0_1267:
  2345  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  2346  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  2347  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  2348  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  2349  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  2350  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  2351  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  2352  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  2353  	LONG $0x10b2448b         // mov    eax, dword [rdx + 4*rsi + 16]
  2354  	LONG $0x10b14489         // mov    dword [rcx + 4*rsi + 16], eax
  2355  	LONG $0x14b2448b         // mov    eax, dword [rdx + 4*rsi + 20]
  2356  	LONG $0x14b14489         // mov    dword [rcx + 4*rsi + 20], eax
  2357  	LONG $0x18b2448b         // mov    eax, dword [rdx + 4*rsi + 24]
  2358  	LONG $0x18b14489         // mov    dword [rcx + 4*rsi + 24], eax
  2359  	LONG $0x1cb2448b         // mov    eax, dword [rdx + 4*rsi + 28]
  2360  	LONG $0x1cb14489         // mov    dword [rcx + 4*rsi + 28], eax
  2361  	LONG $0x08c68348         // add    rsi, 8
  2362  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  2363  	JNE  LBB0_1267
  2364  	JMP  LBB0_1526
  2365  
  2366  LBB0_371:
  2367  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2368  	JLE  LBB0_1526
  2369  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2370  	LONG $0x04f88341         // cmp    r8d, 4
  2371  	JB   LBB0_373
  2372  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2373  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2374  	JBE  LBB0_889
  2375  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
  2376  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2377  	JBE  LBB0_889
  2378  
  2379  LBB0_373:
  2380  	WORD $0xf631 // xor    esi, esi
  2381  
  2382  LBB0_1274:
  2383  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2384  	WORD $0xf749; BYTE $0xd0 // not    r8
  2385  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2386  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2387  	LONG $0x03e78348         // and    rdi, 3
  2388  	JE   LBB0_1276
  2389  
  2390  LBB0_1275:
  2391  	LONG $0x3204b60f // movzx    eax, byte [rdx + rsi]
  2392  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
  2393  	LONG $0x01c68348 // add    rsi, 1
  2394  	LONG $0xffc78348 // add    rdi, -1
  2395  	JNE  LBB0_1275
  2396  
  2397  LBB0_1276:
  2398  	LONG $0x03f88349 // cmp    r8, 3
  2399  	JB   LBB0_1526
  2400  
  2401  LBB0_1277:
  2402  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2403  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  2404  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2405  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
  2406  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2407  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
  2408  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2409  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
  2410  	LONG $0x04c68348             // add    rsi, 4
  2411  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2412  	JNE  LBB0_1277
  2413  	JMP  LBB0_1526
  2414  
  2415  LBB0_374:
  2416  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2417  	JLE  LBB0_1526
  2418  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2419  	LONG $0x08f88341         // cmp    r8d, 8
  2420  	JB   LBB0_376
  2421  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2422  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2423  	JBE  LBB0_891
  2424  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2425  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2426  	JBE  LBB0_891
  2427  
  2428  LBB0_376:
  2429  	WORD $0xf631 // xor    esi, esi
  2430  
  2431  LBB0_1394:
  2432  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2433  	WORD $0xf749; BYTE $0xd0 // not    r8
  2434  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2435  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2436  	LONG $0x03e78348         // and    rdi, 3
  2437  	JE   LBB0_1396
  2438  
  2439  LBB0_1395:
  2440  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2441  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  2442  	LONG $0xc02a0ff3             // cvtsi2ss    xmm0, eax
  2443  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  2444  	LONG $0x01c68348             // add    rsi, 1
  2445  	LONG $0xffc78348             // add    rdi, -1
  2446  	JNE  LBB0_1395
  2447  
  2448  LBB0_1396:
  2449  	LONG $0x03f88349 // cmp    r8, 3
  2450  	JB   LBB0_1526
  2451  
  2452  LBB0_1397:
  2453  	LONG $0x3204b60f               // movzx    eax, byte [rdx + rsi]
  2454  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2455  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2456  	LONG $0x04110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm0
  2457  	LONG $0x3244b60f; BYTE $0x01   // movzx    eax, byte [rdx + rsi + 1]
  2458  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2459  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2460  	LONG $0x44110ff3; WORD $0x04b1 // movss    dword [rcx + 4*rsi + 4], xmm0
  2461  	LONG $0x3244b60f; BYTE $0x02   // movzx    eax, byte [rdx + rsi + 2]
  2462  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2463  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2464  	LONG $0x44110ff3; WORD $0x08b1 // movss    dword [rcx + 4*rsi + 8], xmm0
  2465  	LONG $0x3244b60f; BYTE $0x03   // movzx    eax, byte [rdx + rsi + 3]
  2466  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  2467  	LONG $0xc02a0ff3               // cvtsi2ss    xmm0, eax
  2468  	LONG $0x44110ff3; WORD $0x0cb1 // movss    dword [rcx + 4*rsi + 12], xmm0
  2469  	LONG $0x04c68348               // add    rsi, 4
  2470  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2471  	JNE  LBB0_1397
  2472  	JMP  LBB0_1526
  2473  
  2474  LBB0_377:
  2475  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2476  	JLE  LBB0_1526
  2477  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2478  	LONG $0x04f88341         // cmp    r8d, 4
  2479  	JAE  LBB0_696
  2480  	WORD $0xf631             // xor    esi, esi
  2481  	JMP  LBB0_900
  2482  
  2483  LBB0_380:
  2484  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2485  	JLE  LBB0_1526
  2486  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2487  	LONG $0x08f88341         // cmp    r8d, 8
  2488  	JAE  LBB0_698
  2489  	WORD $0xf631             // xor    esi, esi
  2490  	JMP  LBB0_1072
  2491  
  2492  LBB0_383:
  2493  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2494  	JLE  LBB0_1526
  2495  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2496  	LONG $0x08f88341         // cmp    r8d, 8
  2497  	JB   LBB0_385
  2498  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2499  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2500  	JBE  LBB0_901
  2501  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2502  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2503  	JBE  LBB0_901
  2504  
  2505  LBB0_385:
  2506  	WORD $0xf631 // xor    esi, esi
  2507  
  2508  LBB0_1402:
  2509  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2510  	WORD $0xf749; BYTE $0xd0 // not    r8
  2511  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2512  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2513  	LONG $0x03e78348         // and    rdi, 3
  2514  	JE   LBB0_1404
  2515  
  2516  LBB0_1403:
  2517  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  2518  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2519  	LONG $0x01c68348         // add    rsi, 1
  2520  	LONG $0xffc78348         // add    rdi, -1
  2521  	JNE  LBB0_1403
  2522  
  2523  LBB0_1404:
  2524  	LONG $0x03f88349 // cmp    r8, 3
  2525  	JB   LBB0_1526
  2526  
  2527  LBB0_1405:
  2528  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  2529  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2530  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  2531  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2532  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  2533  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2534  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  2535  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2536  	LONG $0x04c68348             // add    rsi, 4
  2537  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2538  	JNE  LBB0_1405
  2539  	JMP  LBB0_1526
  2540  
  2541  LBB0_386:
  2542  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2543  	JLE  LBB0_1526
  2544  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2545  	LONG $0x04f88341         // cmp    r8d, 4
  2546  	JB   LBB0_388
  2547  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2548  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2549  	JBE  LBB0_904
  2550  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2551  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2552  	JBE  LBB0_904
  2553  
  2554  LBB0_388:
  2555  	WORD $0xf631 // xor    esi, esi
  2556  
  2557  LBB0_1410:
  2558  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2559  	WORD $0xf749; BYTE $0xd0 // not    r8
  2560  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2561  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2562  	LONG $0x03e78348         // and    rdi, 3
  2563  	JE   LBB0_1412
  2564  
  2565  LBB0_1411:
  2566  	LONG $0x042c0ff2; BYTE $0xf2 // cvttsd2si    eax, qword [rdx + 8*rsi]
  2567  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2568  	LONG $0x01c68348             // add    rsi, 1
  2569  	LONG $0xffc78348             // add    rdi, -1
  2570  	JNE  LBB0_1411
  2571  
  2572  LBB0_1412:
  2573  	LONG $0x03f88349 // cmp    r8, 3
  2574  	JB   LBB0_1526
  2575  
  2576  LBB0_1413:
  2577  	LONG $0x042c0ff2; BYTE $0xf2   // cvttsd2si    eax, qword [rdx + 8*rsi]
  2578  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  2579  	LONG $0x442c0ff2; WORD $0x08f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 8]
  2580  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  2581  	LONG $0x442c0ff2; WORD $0x10f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 16]
  2582  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  2583  	LONG $0x442c0ff2; WORD $0x18f2 // cvttsd2si    eax, qword [rdx + 8*rsi + 24]
  2584  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  2585  	LONG $0x04c68348               // add    rsi, 4
  2586  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2587  	JNE  LBB0_1413
  2588  	JMP  LBB0_1526
  2589  
  2590  LBB0_389:
  2591  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2592  	JLE  LBB0_1526
  2593  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2594  	LONG $0x20f88341         // cmp    r8d, 32
  2595  	JB   LBB0_391
  2596  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2597  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2598  	JBE  LBB0_907
  2599  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2600  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2601  	JBE  LBB0_907
  2602  
  2603  LBB0_391:
  2604  	WORD $0xf631 // xor    esi, esi
  2605  
  2606  LBB0_1284:
  2607  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2608  	WORD $0xf749; BYTE $0xd0 // not    r8
  2609  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2610  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2611  	LONG $0x03e78348         // and    rdi, 3
  2612  	JE   LBB0_1286
  2613  
  2614  LBB0_1285:
  2615  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2616  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2617  	LONG $0x01c68348         // add    rsi, 1
  2618  	LONG $0xffc78348         // add    rdi, -1
  2619  	JNE  LBB0_1285
  2620  
  2621  LBB0_1286:
  2622  	LONG $0x03f88349 // cmp    r8, 3
  2623  	JB   LBB0_1526
  2624  
  2625  LBB0_1287:
  2626  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2627  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2628  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2629  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2630  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2631  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2632  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2633  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2634  	LONG $0x04c68348             // add    rsi, 4
  2635  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2636  	JNE  LBB0_1287
  2637  	JMP  LBB0_1526
  2638  
  2639  LBB0_392:
  2640  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2641  	JLE  LBB0_1526
  2642  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2643  	LONG $0x04f88341         // cmp    r8d, 4
  2644  	JB   LBB0_394
  2645  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2646  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2647  	JBE  LBB0_909
  2648  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2649  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2650  	JBE  LBB0_909
  2651  
  2652  LBB0_394:
  2653  	WORD $0xf631 // xor    esi, esi
  2654  
  2655  LBB0_1418:
  2656  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2657  	WORD $0xf749; BYTE $0xd0 // not    r8
  2658  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2659  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2660  	LONG $0x03e78348         // and    rdi, 3
  2661  	JE   LBB0_1420
  2662  
  2663  LBB0_1419:
  2664  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  2665  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2666  	LONG $0x01c68348         // add    rsi, 1
  2667  	LONG $0xffc78348         // add    rdi, -1
  2668  	JNE  LBB0_1419
  2669  
  2670  LBB0_1420:
  2671  	LONG $0x03f88349 // cmp    r8, 3
  2672  	JB   LBB0_1526
  2673  
  2674  LBB0_1421:
  2675  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  2676  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2677  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  2678  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2679  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  2680  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2681  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  2682  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2683  	LONG $0x04c68348             // add    rsi, 4
  2684  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2685  	JNE  LBB0_1421
  2686  	JMP  LBB0_1526
  2687  
  2688  LBB0_395:
  2689  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2690  	JLE  LBB0_1526
  2691  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2692  	LONG $0x10f88341         // cmp    r8d, 16
  2693  	JB   LBB0_397
  2694  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  2695  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2696  	JBE  LBB0_912
  2697  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2698  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2699  	JBE  LBB0_912
  2700  
  2701  LBB0_397:
  2702  	WORD $0xf631 // xor    esi, esi
  2703  
  2704  LBB0_1426:
  2705  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2706  	WORD $0xf749; BYTE $0xd0 // not    r8
  2707  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2708  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2709  	LONG $0x03e78348         // and    rdi, 3
  2710  	JE   LBB0_1428
  2711  
  2712  LBB0_1427:
  2713  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
  2714  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2715  	LONG $0x01c68348         // add    rsi, 1
  2716  	LONG $0xffc78348         // add    rdi, -1
  2717  	JNE  LBB0_1427
  2718  
  2719  LBB0_1428:
  2720  	LONG $0x03f88349 // cmp    r8, 3
  2721  	JB   LBB0_1526
  2722  
  2723  LBB0_1429:
  2724  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
  2725  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2726  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
  2727  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2728  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
  2729  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2730  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
  2731  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2732  	LONG $0x04c68348             // add    rsi, 4
  2733  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2734  	JNE  LBB0_1429
  2735  	JMP  LBB0_1526
  2736  
  2737  LBB0_398:
  2738  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2739  	JLE  LBB0_1526
  2740  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2741  	LONG $0x10f88341         // cmp    r8d, 16
  2742  	JB   LBB0_400
  2743  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
  2744  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2745  	JBE  LBB0_915
  2746  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2747  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2748  	JBE  LBB0_915
  2749  
  2750  LBB0_400:
  2751  	WORD $0xf631 // xor    esi, esi
  2752  
  2753  LBB0_1434:
  2754  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2755  	WORD $0xf749; BYTE $0xd0 // not    r8
  2756  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2757  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2758  	LONG $0x03e78348         // and    rdi, 3
  2759  	JE   LBB0_1436
  2760  
  2761  LBB0_1435:
  2762  	LONG $0x7204b60f         // movzx    eax, byte [rdx + 2*rsi]
  2763  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2764  	LONG $0x01c68348         // add    rsi, 1
  2765  	LONG $0xffc78348         // add    rdi, -1
  2766  	JNE  LBB0_1435
  2767  
  2768  LBB0_1436:
  2769  	LONG $0x03f88349 // cmp    r8, 3
  2770  	JB   LBB0_1526
  2771  
  2772  LBB0_1437:
  2773  	LONG $0x7204b60f             // movzx    eax, byte [rdx + 2*rsi]
  2774  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2775  	LONG $0x7244b60f; BYTE $0x02 // movzx    eax, byte [rdx + 2*rsi + 2]
  2776  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2777  	LONG $0x7244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 2*rsi + 4]
  2778  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2779  	LONG $0x7244b60f; BYTE $0x06 // movzx    eax, byte [rdx + 2*rsi + 6]
  2780  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2781  	LONG $0x04c68348             // add    rsi, 4
  2782  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2783  	JNE  LBB0_1437
  2784  	JMP  LBB0_1526
  2785  
  2786  LBB0_401:
  2787  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2788  	JLE  LBB0_1526
  2789  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2790  	LONG $0x04f88341         // cmp    r8d, 4
  2791  	JB   LBB0_403
  2792  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
  2793  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2794  	JBE  LBB0_918
  2795  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2796  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2797  	JBE  LBB0_918
  2798  
  2799  LBB0_403:
  2800  	WORD $0xf631 // xor    esi, esi
  2801  
  2802  LBB0_1442:
  2803  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2804  	WORD $0xf749; BYTE $0xd0 // not    r8
  2805  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2806  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2807  	LONG $0x03e78348         // and    rdi, 3
  2808  	JE   LBB0_1444
  2809  
  2810  LBB0_1443:
  2811  	LONG $0xf204b60f         // movzx    eax, byte [rdx + 8*rsi]
  2812  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2813  	LONG $0x01c68348         // add    rsi, 1
  2814  	LONG $0xffc78348         // add    rdi, -1
  2815  	JNE  LBB0_1443
  2816  
  2817  LBB0_1444:
  2818  	LONG $0x03f88349 // cmp    r8, 3
  2819  	JB   LBB0_1526
  2820  
  2821  LBB0_1445:
  2822  	LONG $0xf204b60f             // movzx    eax, byte [rdx + 8*rsi]
  2823  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2824  	LONG $0xf244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 8*rsi + 8]
  2825  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2826  	LONG $0xf244b60f; BYTE $0x10 // movzx    eax, byte [rdx + 8*rsi + 16]
  2827  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2828  	LONG $0xf244b60f; BYTE $0x18 // movzx    eax, byte [rdx + 8*rsi + 24]
  2829  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2830  	LONG $0x04c68348             // add    rsi, 4
  2831  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2832  	JNE  LBB0_1445
  2833  	JMP  LBB0_1526
  2834  
  2835  LBB0_404:
  2836  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2837  	JLE  LBB0_1526
  2838  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2839  	LONG $0x08f88341         // cmp    r8d, 8
  2840  	JB   LBB0_406
  2841  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2842  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2843  	JBE  LBB0_921
  2844  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2845  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2846  	JBE  LBB0_921
  2847  
  2848  LBB0_406:
  2849  	WORD $0xf631 // xor    esi, esi
  2850  
  2851  LBB0_1450:
  2852  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2853  	WORD $0xf749; BYTE $0xd0 // not    r8
  2854  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2855  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2856  	LONG $0x03e78348         // and    rdi, 3
  2857  	JE   LBB0_1452
  2858  
  2859  LBB0_1451:
  2860  	LONG $0x042c0ff3; BYTE $0xb2 // cvttss2si    eax, dword [rdx + 4*rsi]
  2861  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2862  	LONG $0x01c68348             // add    rsi, 1
  2863  	LONG $0xffc78348             // add    rdi, -1
  2864  	JNE  LBB0_1451
  2865  
  2866  LBB0_1452:
  2867  	LONG $0x03f88349 // cmp    r8, 3
  2868  	JB   LBB0_1526
  2869  
  2870  LBB0_1453:
  2871  	LONG $0x042c0ff3; BYTE $0xb2   // cvttss2si    eax, dword [rdx + 4*rsi]
  2872  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
  2873  	LONG $0x442c0ff3; WORD $0x04b2 // cvttss2si    eax, dword [rdx + 4*rsi + 4]
  2874  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
  2875  	LONG $0x442c0ff3; WORD $0x08b2 // cvttss2si    eax, dword [rdx + 4*rsi + 8]
  2876  	LONG $0x02314488               // mov    byte [rcx + rsi + 2], al
  2877  	LONG $0x442c0ff3; WORD $0x0cb2 // cvttss2si    eax, dword [rdx + 4*rsi + 12]
  2878  	LONG $0x03314488               // mov    byte [rcx + rsi + 3], al
  2879  	LONG $0x04c68348               // add    rsi, 4
  2880  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  2881  	JNE  LBB0_1453
  2882  	JMP  LBB0_1526
  2883  
  2884  LBB0_407:
  2885  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2886  	JLE  LBB0_1526
  2887  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2888  	LONG $0x20f88341         // cmp    r8d, 32
  2889  	JB   LBB0_409
  2890  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  2891  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2892  	JBE  LBB0_924
  2893  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2894  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2895  	JBE  LBB0_924
  2896  
  2897  LBB0_409:
  2898  	WORD $0xf631 // xor    esi, esi
  2899  
  2900  LBB0_1294:
  2901  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2902  	WORD $0xf749; BYTE $0xd0 // not    r8
  2903  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2904  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2905  	LONG $0x03e78348         // and    rdi, 3
  2906  	JE   LBB0_1296
  2907  
  2908  LBB0_1295:
  2909  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2910  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2911  	LONG $0x01c68348         // add    rsi, 1
  2912  	LONG $0xffc78348         // add    rdi, -1
  2913  	JNE  LBB0_1295
  2914  
  2915  LBB0_1296:
  2916  	LONG $0x03f88349 // cmp    r8, 3
  2917  	JB   LBB0_1526
  2918  
  2919  LBB0_1297:
  2920  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2921  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2922  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2923  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2924  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2925  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2926  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2927  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2928  	LONG $0x04c68348             // add    rsi, 4
  2929  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2930  	JNE  LBB0_1297
  2931  	JMP  LBB0_1526
  2932  
  2933  LBB0_410:
  2934  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2935  	JLE  LBB0_1526
  2936  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2937  	LONG $0x08f88341         // cmp    r8d, 8
  2938  	JB   LBB0_412
  2939  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2940  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2941  	JBE  LBB0_926
  2942  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
  2943  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2944  	JBE  LBB0_926
  2945  
  2946  LBB0_412:
  2947  	WORD $0xf631 // xor    esi, esi
  2948  
  2949  LBB0_1458:
  2950  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  2951  	WORD $0xf749; BYTE $0xd0 // not    r8
  2952  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  2953  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  2954  	LONG $0x03e78348         // and    rdi, 3
  2955  	JE   LBB0_1460
  2956  
  2957  LBB0_1459:
  2958  	LONG $0xb204b60f         // movzx    eax, byte [rdx + 4*rsi]
  2959  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
  2960  	LONG $0x01c68348         // add    rsi, 1
  2961  	LONG $0xffc78348         // add    rdi, -1
  2962  	JNE  LBB0_1459
  2963  
  2964  LBB0_1460:
  2965  	LONG $0x03f88349 // cmp    r8, 3
  2966  	JB   LBB0_1526
  2967  
  2968  LBB0_1461:
  2969  	LONG $0xb204b60f             // movzx    eax, byte [rdx + 4*rsi]
  2970  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
  2971  	LONG $0xb244b60f; BYTE $0x04 // movzx    eax, byte [rdx + 4*rsi + 4]
  2972  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
  2973  	LONG $0xb244b60f; BYTE $0x08 // movzx    eax, byte [rdx + 4*rsi + 8]
  2974  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
  2975  	LONG $0xb244b60f; BYTE $0x0c // movzx    eax, byte [rdx + 4*rsi + 12]
  2976  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
  2977  	LONG $0x04c68348             // add    rsi, 4
  2978  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  2979  	JNE  LBB0_1461
  2980  	JMP  LBB0_1526
  2981  
  2982  LBB0_413:
  2983  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  2984  	JLE  LBB0_1526
  2985  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  2986  	LONG $0x08f88341         // cmp    r8d, 8
  2987  	JB   LBB0_415
  2988  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  2989  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  2990  	JBE  LBB0_929
  2991  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  2992  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  2993  	JBE  LBB0_929
  2994  
  2995  LBB0_415:
  2996  	WORD $0xf631 // xor    esi, esi
  2997  
  2998  LBB0_1304:
  2999  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3000  	WORD $0xf749; BYTE $0xd0 // not    r8
  3001  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3002  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3003  	LONG $0x03e78348         // and    rdi, 3
  3004  	JE   LBB0_1306
  3005  
  3006  LBB0_1305:
  3007  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3008  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3009  	LONG $0x01c68348         // add    rsi, 1
  3010  	LONG $0xffc78348         // add    rdi, -1
  3011  	JNE  LBB0_1305
  3012  
  3013  LBB0_1306:
  3014  	LONG $0x03f88349 // cmp    r8, 3
  3015  	JB   LBB0_1526
  3016  
  3017  LBB0_1307:
  3018  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3019  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3020  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  3021  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  3022  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  3023  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  3024  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  3025  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  3026  	LONG $0x04c68348         // add    rsi, 4
  3027  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  3028  	JNE  LBB0_1307
  3029  	JMP  LBB0_1526
  3030  
  3031  LBB0_416:
  3032  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3033  	JLE  LBB0_1526
  3034  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3035  	LONG $0x04f88341         // cmp    r8d, 4
  3036  	JAE  LBB0_734
  3037  	WORD $0xf631             // xor    esi, esi
  3038  	JMP  LBB0_1077
  3039  
  3040  LBB0_419:
  3041  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3042  	JLE  LBB0_1526
  3043  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3044  	LONG $0x08f88341         // cmp    r8d, 8
  3045  	JB   LBB0_421
  3046  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  3047  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3048  	JBE  LBB0_931
  3049  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3050  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3051  	JBE  LBB0_931
  3052  
  3053  LBB0_421:
  3054  	WORD $0xf631 // xor    esi, esi
  3055  
  3056  LBB0_1466:
  3057  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3058  	WORD $0xf749; BYTE $0xd0 // not    r8
  3059  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3060  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3061  	LONG $0x03e08348         // and    rax, 3
  3062  	JE   LBB0_1468
  3063  
  3064  LBB0_1467:
  3065  	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
  3066  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
  3067  	LONG $0x01c68348         // add    rsi, 1
  3068  	LONG $0xffc08348         // add    rax, -1
  3069  	JNE  LBB0_1467
  3070  
  3071  LBB0_1468:
  3072  	LONG $0x03f88349 // cmp    r8, 3
  3073  	JB   LBB0_1526
  3074  
  3075  LBB0_1469:
  3076  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  3077  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  3078  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
  3079  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
  3080  	LONG $0x3244be0f; BYTE $0x02 // movsx    eax, byte [rdx + rsi + 2]
  3081  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
  3082  	LONG $0x3244be0f; BYTE $0x03 // movsx    eax, byte [rdx + rsi + 3]
  3083  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
  3084  	LONG $0x04c68348             // add    rsi, 4
  3085  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  3086  	JNE  LBB0_1469
  3087  	JMP  LBB0_1526
  3088  
  3089  LBB0_422:
  3090  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3091  	JLE  LBB0_1526
  3092  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3093  	LONG $0x04f88341         // cmp    r8d, 4
  3094  	JAE  LBB0_740
  3095  	WORD $0xf631             // xor    esi, esi
  3096  	JMP  LBB0_943
  3097  
  3098  LBB0_425:
  3099  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3100  	JLE  LBB0_1526
  3101  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3102  	LONG $0x08f88341         // cmp    r8d, 8
  3103  	JAE  LBB0_743
  3104  	WORD $0xf631             // xor    esi, esi
  3105  	JMP  LBB0_1082
  3106  
  3107  LBB0_428:
  3108  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3109  	JLE  LBB0_1526
  3110  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3111  	LONG $0x08f88341         // cmp    r8d, 8
  3112  	JAE  LBB0_746
  3113  	WORD $0xf631             // xor    esi, esi
  3114  	JMP  LBB0_1087
  3115  
  3116  LBB0_431:
  3117  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3118  	JLE  LBB0_1526
  3119  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3120  	LONG $0x04f88341         // cmp    r8d, 4
  3121  	JAE  LBB0_749
  3122  	WORD $0xf631             // xor    esi, esi
  3123  	JMP  LBB0_1092
  3124  
  3125  LBB0_434:
  3126  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3127  	JLE  LBB0_1526
  3128  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3129  	LONG $0x08f88341         // cmp    r8d, 8
  3130  	JAE  LBB0_752
  3131  	WORD $0xf631             // xor    esi, esi
  3132  	JMP  LBB0_1097
  3133  
  3134  LBB0_437:
  3135  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3136  	JLE  LBB0_1526
  3137  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3138  	LONG $0x08f88341         // cmp    r8d, 8
  3139  	JB   LBB0_439
  3140  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
  3141  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3142  	JBE  LBB0_934
  3143  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3144  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3145  	JBE  LBB0_934
  3146  
  3147  LBB0_439:
  3148  	WORD $0xf631 // xor    esi, esi
  3149  
  3150  LBB0_1474:
  3151  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3152  	WORD $0xf749; BYTE $0xd0 // not    r8
  3153  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3154  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3155  	LONG $0x03e08348         // and    rax, 3
  3156  	JE   LBB0_1476
  3157  
  3158  LBB0_1475:
  3159  	LONG $0x323cb60f         // movzx    edi, byte [rdx + rsi]
  3160  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
  3161  	LONG $0x01c68348         // add    rsi, 1
  3162  	LONG $0xffc08348         // add    rax, -1
  3163  	JNE  LBB0_1475
  3164  
  3165  LBB0_1476:
  3166  	LONG $0x03f88349 // cmp    r8, 3
  3167  	JB   LBB0_1526
  3168  
  3169  LBB0_1477:
  3170  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  3171  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  3172  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  3173  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
  3174  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  3175  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
  3176  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  3177  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
  3178  	LONG $0x04c68348             // add    rsi, 4
  3179  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  3180  	JNE  LBB0_1477
  3181  	JMP  LBB0_1526
  3182  
  3183  LBB0_440:
  3184  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
  3185  	JLE  LBB0_1526
  3186  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
  3187  	LONG $0x08f88341         // cmp    r8d, 8
  3188  	JB   LBB0_442
  3189  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
  3190  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
  3191  	JBE  LBB0_937
  3192  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
  3193  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  3194  	JBE  LBB0_937
  3195  
  3196  LBB0_442:
  3197  	WORD $0xf631 // xor    esi, esi
  3198  
  3199  LBB0_1314:
  3200  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
  3201  	WORD $0xf749; BYTE $0xd0 // not    r8
  3202  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
  3203  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
  3204  	LONG $0x03e78348         // and    rdi, 3
  3205  	JE   LBB0_1316
  3206  
  3207  LBB0_1315:
  3208  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3209  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3210  	LONG $0x01c68348         // add    rsi, 1
  3211  	LONG $0xffc78348         // add    rdi, -1
  3212  	JNE  LBB0_1315
  3213  
  3214  LBB0_1316:
  3215  	LONG $0x03f88349 // cmp    r8, 3
  3216  	JB   LBB0_1526
  3217  
  3218  LBB0_1317:
  3219  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  3220  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  3221  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
  3222  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
  3223  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
  3224  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
  3225  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
  3226  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
  3227  	LONG $0x04c68348         // add    rsi, 4
  3228  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  3229  	JNE  LBB0_1317
  3230  	JMP  LBB0_1526
  3231  
  3232  LBB0_446:
  3233  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3234  	WORD $0xff31             // xor    edi, edi
  3235  
  3236  LBB0_447:
  3237  	LONG $0x2c0f48f2; WORD $0xfa04             // cvttsd2si    rax, qword [rdx + 8*rdi]
  3238  	WORD $0x0489; BYTE $0xb9                   // mov    dword [rcx + 4*rdi], eax
  3239  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x08 // cvttsd2si    rax, qword [rdx + 8*rdi + 8]
  3240  	LONG $0x04b94489                           // mov    dword [rcx + 4*rdi + 4], eax
  3241  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x10 // cvttsd2si    rax, qword [rdx + 8*rdi + 16]
  3242  	LONG $0x08b94489                           // mov    dword [rcx + 4*rdi + 8], eax
  3243  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x18 // cvttsd2si    rax, qword [rdx + 8*rdi + 24]
  3244  	LONG $0x0cb94489                           // mov    dword [rcx + 4*rdi + 12], eax
  3245  	LONG $0x04c78348                           // add    rdi, 4
  3246  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  3247  	JNE  LBB0_447
  3248  
  3249  LBB0_448:
  3250  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3251  	JE   LBB0_1526
  3252  	LONG $0xb90c8d48         // lea    rcx, [rcx + 4*rdi]
  3253  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  3254  	WORD $0xf631             // xor    esi, esi
  3255  
  3256  LBB0_450:
  3257  	LONG $0x2c0f48f2; WORD $0xf204 // cvttsd2si    rax, qword [rdx + 8*rsi]
  3258  	WORD $0x0489; BYTE $0xb1       // mov    dword [rcx + 4*rsi], eax
  3259  	LONG $0x01c68348               // add    rsi, 1
  3260  	WORD $0x3949; BYTE $0xf0       // cmp    r8, rsi
  3261  	JNE  LBB0_450
  3262  	JMP  LBB0_1526
  3263  
  3264  LBB0_454:
  3265  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3266  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3267  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3268  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3269  	LONG $0x02e8c149         // shr    r8, 2
  3270  	LONG $0x01c08349         // add    r8, 1
  3271  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3272  	JE   LBB0_944
  3273  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3274  	LONG $0xfee08348         // and    rax, -2
  3275  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3276  	WORD $0xff31             // xor    edi, edi
  3277  
  3278  LBB0_456:
  3279  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  3280  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  3281  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  3282  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  3283  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  3284  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  3285  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  3286  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  3287  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  3288  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  3289  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  3290  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
  3291  	LONG $0x08c78348               // add    rdi, 8
  3292  	LONG $0x02c08348               // add    rax, 2
  3293  	JNE  LBB0_456
  3294  	JMP  LBB0_945
  3295  
  3296  LBB0_457:
  3297  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3298  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  3299  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  3300  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3301  	LONG $0x03e8c149         // shr    r8, 3
  3302  	LONG $0x01c08349         // add    r8, 1
  3303  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3304  	JE   LBB0_949
  3305  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3306  	LONG $0xfee08348         // and    rax, -2
  3307  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3308  	WORD $0xff31             // xor    edi, edi
  3309  
  3310  LBB0_459:
  3311  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  3312  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  3313  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  3314  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  3315  	LONG $0x33380f66; WORD $0x7a44; BYTE $0x10 // pmovzxwd    xmm0, qword [rdx + 2*rdi + 16]
  3316  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x18 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 24]
  3317  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  3318  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  3319  	LONG $0x10c78348                           // add    rdi, 16
  3320  	LONG $0x02c08348                           // add    rax, 2
  3321  	JNE  LBB0_459
  3322  	JMP  LBB0_950
  3323  
  3324  LBB0_460:
  3325  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3326  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  3327  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  3328  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3329  	LONG $0x03e8c149         // shr    r8, 3
  3330  	LONG $0x01c08349         // add    r8, 1
  3331  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3332  	JE   LBB0_954
  3333  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3334  	LONG $0xfee08348         // and    rax, -2
  3335  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3336  	WORD $0xff31             // xor    edi, edi
  3337  
  3338  LBB0_462:
  3339  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  3340  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  3341  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  3342  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  3343  	LONG $0x23380f66; WORD $0x7a44; BYTE $0x10 // pmovsxwd    xmm0, qword [rdx + 2*rdi + 16]
  3344  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x18 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 24]
  3345  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  3346  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  3347  	LONG $0x10c78348                           // add    rdi, 16
  3348  	LONG $0x02c08348                           // add    rax, 2
  3349  	JNE  LBB0_462
  3350  	JMP  LBB0_955
  3351  
  3352  LBB0_463:
  3353  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3354  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3355  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3356  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3357  	LONG $0x02e8c149         // shr    r8, 2
  3358  	LONG $0x01c08349         // add    r8, 1
  3359  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3360  	JE   LBB0_959
  3361  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3362  	LONG $0xfee08348         // and    rax, -2
  3363  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3364  	WORD $0xff31             // xor    edi, edi
  3365  
  3366  LBB0_465:
  3367  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  3368  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  3369  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  3370  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  3371  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  3372  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  3373  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  3374  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  3375  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  3376  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  3377  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  3378  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
  3379  	LONG $0x08c78348               // add    rdi, 8
  3380  	LONG $0x02c08348               // add    rax, 2
  3381  	JNE  LBB0_465
  3382  	JMP  LBB0_960
  3383  
  3384  LBB0_466:
  3385  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3386  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  3387  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  3388  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3389  	LONG $0x03e8c149         // shr    r8, 3
  3390  	LONG $0x01c08349         // add    r8, 1
  3391  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3392  	JE   LBB0_964
  3393  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3394  	LONG $0xfee08348         // and    rax, -2
  3395  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3396  	WORD $0xff31             // xor    edi, edi
  3397  	LONG $0x204d280f         // movaps    xmm1, oword 32[rbp] /* [rip + .LCPI0_3] */
  3398  	LONG $0x3055280f         // movaps    xmm2, oword 48[rbp] /* [rip + .LCPI0_4] */
  3399  
  3400  LBB0_468:
  3401  	LONG $0xba1c100f             // movups    xmm3, oword [rdx + 4*rdi]
  3402  	LONG $0xba64100f; BYTE $0x10 // movups    xmm4, oword [rdx + 4*rdi + 16]
  3403  	WORD $0x280f; BYTE $0xc3     // movaps    xmm0, xmm3
  3404  	LONG $0x01c1c20f             // cmpltps    xmm0, xmm1
  3405  	LONG $0xeb5b0ff3             // cvttps2dq    xmm5, xmm3
  3406  	WORD $0x5c0f; BYTE $0xd9     // subps    xmm3, xmm1
  3407  	LONG $0xdb5b0ff3             // cvttps2dq    xmm3, xmm3
  3408  	WORD $0x570f; BYTE $0xda     // xorps    xmm3, xmm2
  3409  	LONG $0x14380f66; BYTE $0xdd // blendvps    xmm3, xmm5, xmm0
  3410  	WORD $0x280f; BYTE $0xc4     // movaps    xmm0, xmm4
  3411  	LONG $0x01c1c20f             // cmpltps    xmm0, xmm1
  3412  	LONG $0xec5b0ff3             // cvttps2dq    xmm5, xmm4
  3413  	WORD $0x5c0f; BYTE $0xe1     // subps    xmm4, xmm1
  3414  	LONG $0xe45b0ff3             // cvttps2dq    xmm4, xmm4
  3415  	WORD $0x570f; BYTE $0xe2     // xorps    xmm4, xmm2
  3416  	LONG $0x14380f66; BYTE $0xe5 // blendvps    xmm4, xmm5, xmm0
  3417  	LONG $0xb91c110f             // movups    oword [rcx + 4*rdi], xmm3
  3418  	LONG $0xb964110f; BYTE $0x10 // movups    oword [rcx + 4*rdi + 16], xmm4
  3419  	LONG $0xba5c100f; BYTE $0x20 // movups    xmm3, oword [rdx + 4*rdi + 32]
  3420  	WORD $0x280f; BYTE $0xc3     // movaps    xmm0, xmm3
  3421  	LONG $0x01c1c20f             // cmpltps    xmm0, xmm1
  3422  	LONG $0xe35b0ff3             // cvttps2dq    xmm4, xmm3
  3423  	WORD $0x5c0f; BYTE $0xd9     // subps    xmm3, xmm1
  3424  	LONG $0xdb5b0ff3             // cvttps2dq    xmm3, xmm3
  3425  	WORD $0x570f; BYTE $0xda     // xorps    xmm3, xmm2
  3426  	LONG $0x14380f66; BYTE $0xdc // blendvps    xmm3, xmm4, xmm0
  3427  	LONG $0xba64100f; BYTE $0x30 // movups    xmm4, oword [rdx + 4*rdi + 48]
  3428  	WORD $0x280f; BYTE $0xc4     // movaps    xmm0, xmm4
  3429  	LONG $0x01c1c20f             // cmpltps    xmm0, xmm1
  3430  	LONG $0xec5b0ff3             // cvttps2dq    xmm5, xmm4
  3431  	WORD $0x5c0f; BYTE $0xe1     // subps    xmm4, xmm1
  3432  	LONG $0xe45b0ff3             // cvttps2dq    xmm4, xmm4
  3433  	WORD $0x570f; BYTE $0xe2     // xorps    xmm4, xmm2
  3434  	LONG $0x14380f66; BYTE $0xe5 // blendvps    xmm4, xmm5, xmm0
  3435  	LONG $0xb95c110f; BYTE $0x20 // movups    oword [rcx + 4*rdi + 32], xmm3
  3436  	LONG $0xb964110f; BYTE $0x30 // movups    oword [rcx + 4*rdi + 48], xmm4
  3437  	LONG $0x10c78348             // add    rdi, 16
  3438  	LONG $0x02c08348             // add    rax, 2
  3439  	JNE  LBB0_468
  3440  	JMP  LBB0_965
  3441  
  3442  LBB0_475:
  3443  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3444  	WORD $0xff31             // xor    edi, edi
  3445  
  3446  LBB0_476:
  3447  	WORD $0x048b; BYTE $0xba       // mov    eax, dword [rdx + 4*rdi]
  3448  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3449  	LONG $0x2a0f48f2; BYTE $0xc0   // cvtsi2sd    xmm0, rax
  3450  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3451  	LONG $0x04ba448b               // mov    eax, dword [rdx + 4*rdi + 4]
  3452  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3453  	LONG $0x2a0f48f2; BYTE $0xc0   // cvtsi2sd    xmm0, rax
  3454  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3455  	LONG $0x08ba448b               // mov    eax, dword [rdx + 4*rdi + 8]
  3456  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3457  	LONG $0x2a0f48f2; BYTE $0xc0   // cvtsi2sd    xmm0, rax
  3458  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3459  	LONG $0x0cba448b               // mov    eax, dword [rdx + 4*rdi + 12]
  3460  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3461  	LONG $0x2a0f48f2; BYTE $0xc0   // cvtsi2sd    xmm0, rax
  3462  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3463  	LONG $0x04c78348               // add    rdi, 4
  3464  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3465  	JNE  LBB0_476
  3466  
  3467  LBB0_477:
  3468  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3469  	JE   LBB0_1526
  3470  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3471  	LONG $0xba148d48         // lea    rdx, [rdx + 4*rdi]
  3472  	WORD $0xf631             // xor    esi, esi
  3473  
  3474  LBB0_479:
  3475  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  3476  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3477  	LONG $0x2a0f48f2; BYTE $0xc0 // cvtsi2sd    xmm0, rax
  3478  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3479  	LONG $0x01c68348             // add    rsi, 1
  3480  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
  3481  	JNE  LBB0_479
  3482  	JMP  LBB0_1526
  3483  
  3484  LBB0_483:
  3485  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3486  	WORD $0xff31             // xor    edi, edi
  3487  
  3488  LBB0_484:
  3489  	LONG $0x3a04be0f               // movsx    eax, byte [rdx + rdi]
  3490  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3491  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3492  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3493  	LONG $0x3a44be0f; BYTE $0x01   // movsx    eax, byte [rdx + rdi + 1]
  3494  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3495  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3496  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3497  	LONG $0x3a44be0f; BYTE $0x02   // movsx    eax, byte [rdx + rdi + 2]
  3498  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3499  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3500  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3501  	LONG $0x3a44be0f; BYTE $0x03   // movsx    eax, byte [rdx + rdi + 3]
  3502  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3503  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3504  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3505  	LONG $0x04c78348               // add    rdi, 4
  3506  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3507  	JNE  LBB0_484
  3508  
  3509  LBB0_485:
  3510  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3511  	JE   LBB0_1526
  3512  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3513  	WORD $0x0148; BYTE $0xfa // add    rdx, rdi
  3514  	WORD $0xf631             // xor    esi, esi
  3515  
  3516  LBB0_487:
  3517  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
  3518  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3519  	LONG $0xc02a0ff2             // cvtsi2sd    xmm0, eax
  3520  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3521  	LONG $0x01c68348             // add    rsi, 1
  3522  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
  3523  	JNE  LBB0_487
  3524  	JMP  LBB0_1526
  3525  
  3526  LBB0_488:
  3527  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  3528  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  3529  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  3530  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  3531  	LONG $0x02e8c149             // shr    r8, 2
  3532  	LONG $0x01c08349             // add    r8, 1
  3533  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  3534  	JE   LBB0_969
  3535  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  3536  	LONG $0xfee08348             // and    rax, -2
  3537  	WORD $0xf748; BYTE $0xd8     // neg    rax
  3538  	WORD $0xff31                 // xor    edi, edi
  3539  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
  3540  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI0_6] */
  3541  	LONG $0x556f0f66; BYTE $0x60 // movdqa    xmm2, oword 96[rbp] /* [rip + .LCPI0_7] */
  3542  	LONG $0x5d280f66; BYTE $0x70 // movapd    xmm3, oword 112[rbp] /* [rip + .LCPI0_8] */
  3543  
  3544  LBB0_490:
  3545  	LONG $0x246f0ff3; BYTE $0xfa   // movdqu    xmm4, oword [rdx + 8*rdi]
  3546  	LONG $0x6c6f0ff3; WORD $0x10fa // movdqu    xmm5, oword [rdx + 8*rdi + 16]
  3547  	LONG $0xf46f0f66               // movdqa    xmm6, xmm4
  3548  	LONG $0x0e3a0f66; WORD $0xccf0 // pblendw    xmm6, xmm0, 204
  3549  	LONG $0xf1eb0f66               // por    xmm6, xmm1
  3550  	LONG $0xd4730f66; BYTE $0x20   // psrlq    xmm4, 32
  3551  	LONG $0xe2eb0f66               // por    xmm4, xmm2
  3552  	LONG $0xe35c0f66               // subpd    xmm4, xmm3
  3553  	LONG $0xe6580f66               // addpd    xmm4, xmm6
  3554  	LONG $0xf56f0f66               // movdqa    xmm6, xmm5
  3555  	LONG $0x0e3a0f66; WORD $0xccf0 // pblendw    xmm6, xmm0, 204
  3556  	LONG $0xf1eb0f66               // por    xmm6, xmm1
  3557  	LONG $0xd5730f66; BYTE $0x20   // psrlq    xmm5, 32
  3558  	LONG $0xeaeb0f66               // por    xmm5, xmm2
  3559  	LONG $0xeb5c0f66               // subpd    xmm5, xmm3
  3560  	LONG $0xee580f66               // addpd    xmm5, xmm6
  3561  	LONG $0x24110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm4
  3562  	LONG $0x6c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm5
  3563  	LONG $0x646f0ff3; WORD $0x20fa // movdqu    xmm4, oword [rdx + 8*rdi + 32]
  3564  	LONG $0x6c6f0ff3; WORD $0x30fa // movdqu    xmm5, oword [rdx + 8*rdi + 48]
  3565  	LONG $0xf46f0f66               // movdqa    xmm6, xmm4
  3566  	LONG $0x0e3a0f66; WORD $0xccf0 // pblendw    xmm6, xmm0, 204
  3567  	LONG $0xf1eb0f66               // por    xmm6, xmm1
  3568  	LONG $0xd4730f66; BYTE $0x20   // psrlq    xmm4, 32
  3569  	LONG $0xe2eb0f66               // por    xmm4, xmm2
  3570  	LONG $0xe35c0f66               // subpd    xmm4, xmm3
  3571  	LONG $0xe6580f66               // addpd    xmm4, xmm6
  3572  	LONG $0xf56f0f66               // movdqa    xmm6, xmm5
  3573  	LONG $0x0e3a0f66; WORD $0xccf0 // pblendw    xmm6, xmm0, 204
  3574  	LONG $0xf1eb0f66               // por    xmm6, xmm1
  3575  	LONG $0xd5730f66; BYTE $0x20   // psrlq    xmm5, 32
  3576  	LONG $0xeaeb0f66               // por    xmm5, xmm2
  3577  	LONG $0xeb5c0f66               // subpd    xmm5, xmm3
  3578  	LONG $0xee580f66               // addpd    xmm5, xmm6
  3579  	LONG $0x64110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm4
  3580  	LONG $0x6c110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm5
  3581  	LONG $0x08c78348               // add    rdi, 8
  3582  	LONG $0x02c08348               // add    rax, 2
  3583  	JNE  LBB0_490
  3584  	JMP  LBB0_970
  3585  
  3586  LBB0_491:
  3587  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3588  	WORD $0xff31             // xor    edi, edi
  3589  
  3590  LBB0_492:
  3591  	LONG $0x7a04b70f               // movzx    eax, word [rdx + 2*rdi]
  3592  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3593  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3594  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3595  	LONG $0x7a44b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rdi + 2]
  3596  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3597  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3598  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3599  	LONG $0x7a44b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rdi + 4]
  3600  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3601  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3602  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3603  	LONG $0x7a44b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rdi + 6]
  3604  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3605  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3606  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3607  	LONG $0x04c78348               // add    rdi, 4
  3608  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3609  	JNE  LBB0_492
  3610  
  3611  LBB0_493:
  3612  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3613  	JE   LBB0_1526
  3614  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3615  	LONG $0x7a148d48         // lea    rdx, [rdx + 2*rdi]
  3616  	WORD $0xf631             // xor    esi, esi
  3617  
  3618  LBB0_495:
  3619  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  3620  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3621  	LONG $0xc02a0ff2             // cvtsi2sd    xmm0, eax
  3622  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3623  	LONG $0x01c68348             // add    rsi, 1
  3624  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
  3625  	JNE  LBB0_495
  3626  	JMP  LBB0_1526
  3627  
  3628  LBB0_496:
  3629  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3630  	WORD $0xff31             // xor    edi, edi
  3631  
  3632  LBB0_497:
  3633  	LONG $0x7a04bf0f               // movsx    eax, word [rdx + 2*rdi]
  3634  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3635  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3636  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3637  	LONG $0x7a44bf0f; BYTE $0x02   // movsx    eax, word [rdx + 2*rdi + 2]
  3638  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3639  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3640  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3641  	LONG $0x7a44bf0f; BYTE $0x04   // movsx    eax, word [rdx + 2*rdi + 4]
  3642  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3643  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3644  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3645  	LONG $0x7a44bf0f; BYTE $0x06   // movsx    eax, word [rdx + 2*rdi + 6]
  3646  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3647  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3648  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3649  	LONG $0x04c78348               // add    rdi, 4
  3650  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3651  	JNE  LBB0_497
  3652  
  3653  LBB0_498:
  3654  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3655  	JE   LBB0_1526
  3656  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3657  	LONG $0x7a148d48         // lea    rdx, [rdx + 2*rdi]
  3658  	WORD $0xf631             // xor    esi, esi
  3659  
  3660  LBB0_500:
  3661  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
  3662  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3663  	LONG $0xc02a0ff2             // cvtsi2sd    xmm0, eax
  3664  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3665  	LONG $0x01c68348             // add    rsi, 1
  3666  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
  3667  	JNE  LBB0_500
  3668  	JMP  LBB0_1526
  3669  
  3670  LBB0_501:
  3671  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3672  	WORD $0xff31             // xor    edi, edi
  3673  
  3674  LBB0_502:
  3675  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  3676  	LONG $0x2a0f48f2; WORD $0xfa04             // cvtsi2sd    xmm0, qword [rdx + 8*rdi]
  3677  	LONG $0x04110ff2; BYTE $0xf9               // movsd    qword [rcx + 8*rdi], xmm0
  3678  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  3679  	LONG $0x2a0f48f2; WORD $0xfa44; BYTE $0x08 // cvtsi2sd    xmm0, qword [rdx + 8*rdi + 8]
  3680  	LONG $0x44110ff2; WORD $0x08f9             // movsd    qword [rcx + 8*rdi + 8], xmm0
  3681  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  3682  	LONG $0x2a0f48f2; WORD $0xfa44; BYTE $0x10 // cvtsi2sd    xmm0, qword [rdx + 8*rdi + 16]
  3683  	LONG $0x44110ff2; WORD $0x10f9             // movsd    qword [rcx + 8*rdi + 16], xmm0
  3684  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  3685  	LONG $0x2a0f48f2; WORD $0xfa44; BYTE $0x18 // cvtsi2sd    xmm0, qword [rdx + 8*rdi + 24]
  3686  	LONG $0x44110ff2; WORD $0x18f9             // movsd    qword [rcx + 8*rdi + 24], xmm0
  3687  	LONG $0x04c78348                           // add    rdi, 4
  3688  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  3689  	JNE  LBB0_502
  3690  
  3691  LBB0_503:
  3692  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3693  	JE   LBB0_1526
  3694  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3695  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  3696  	WORD $0xf631             // xor    esi, esi
  3697  
  3698  LBB0_505:
  3699  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3700  	LONG $0x2a0f48f2; WORD $0xf204 // cvtsi2sd    xmm0, qword [rdx + 8*rsi]
  3701  	LONG $0x04110ff2; BYTE $0xf1   // movsd    qword [rcx + 8*rsi], xmm0
  3702  	LONG $0x01c68348               // add    rsi, 1
  3703  	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
  3704  	JNE  LBB0_505
  3705  	JMP  LBB0_1526
  3706  
  3707  LBB0_506:
  3708  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3709  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3710  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3711  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  3712  	LONG $0x02e8c149         // shr    r8, 2
  3713  	LONG $0x01c08349         // add    r8, 1
  3714  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3715  	JE   LBB0_975
  3716  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  3717  	LONG $0xfee08348         // and    rax, -2
  3718  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3719  	WORD $0xff31             // xor    edi, edi
  3720  
  3721  LBB0_508:
  3722  	LONG $0xba045a0f               // cvtps2pd    xmm0, qword [rdx + 4*rdi]
  3723  	LONG $0xba4c5a0f; BYTE $0x08   // cvtps2pd    xmm1, qword [rdx + 4*rdi + 8]
  3724  	LONG $0xf904110f               // movups    oword [rcx + 8*rdi], xmm0
  3725  	LONG $0xf94c110f; BYTE $0x10   // movups    oword [rcx + 8*rdi + 16], xmm1
  3726  	LONG $0xba445a0f; BYTE $0x10   // cvtps2pd    xmm0, qword [rdx + 4*rdi + 16]
  3727  	LONG $0xba4c5a0f; BYTE $0x18   // cvtps2pd    xmm1, qword [rdx + 4*rdi + 24]
  3728  	LONG $0x44110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm0
  3729  	LONG $0x4c110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm1
  3730  	LONG $0x08c78348               // add    rdi, 8
  3731  	LONG $0x02c08348               // add    rax, 2
  3732  	JNE  LBB0_508
  3733  	JMP  LBB0_976
  3734  
  3735  LBB0_509:
  3736  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3737  	WORD $0xff31             // xor    edi, edi
  3738  
  3739  LBB0_510:
  3740  	LONG $0x3a04b60f               // movzx    eax, byte [rdx + rdi]
  3741  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3742  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3743  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3744  	LONG $0x3a44b60f; BYTE $0x01   // movzx    eax, byte [rdx + rdi + 1]
  3745  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3746  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3747  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3748  	LONG $0x3a44b60f; BYTE $0x02   // movzx    eax, byte [rdx + rdi + 2]
  3749  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3750  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3751  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3752  	LONG $0x3a44b60f; BYTE $0x03   // movzx    eax, byte [rdx + rdi + 3]
  3753  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3754  	LONG $0xc02a0ff2               // cvtsi2sd    xmm0, eax
  3755  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3756  	LONG $0x04c78348               // add    rdi, 4
  3757  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3758  	JNE  LBB0_510
  3759  
  3760  LBB0_511:
  3761  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  3762  	JE   LBB0_1526
  3763  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3764  	WORD $0x0148; BYTE $0xfa // add    rdx, rdi
  3765  	WORD $0xf631             // xor    esi, esi
  3766  
  3767  LBB0_513:
  3768  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  3769  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3770  	LONG $0xc02a0ff2             // cvtsi2sd    xmm0, eax
  3771  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3772  	LONG $0x01c68348             // add    rsi, 1
  3773  	WORD $0x3949; BYTE $0xf0     // cmp    r8, rsi
  3774  	JNE  LBB0_513
  3775  	JMP  LBB0_1526
  3776  
  3777  LBB0_514:
  3778  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3779  	WORD $0xff31             // xor    edi, edi
  3780  
  3781  LBB0_515:
  3782  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3783  	LONG $0x042a0ff2; BYTE $0xba   // cvtsi2sd    xmm0, dword [rdx + 4*rdi]
  3784  	LONG $0x04110ff2; BYTE $0xf9   // movsd    qword [rcx + 8*rdi], xmm0
  3785  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3786  	LONG $0x442a0ff2; WORD $0x04ba // cvtsi2sd    xmm0, dword [rdx + 4*rdi + 4]
  3787  	LONG $0x44110ff2; WORD $0x08f9 // movsd    qword [rcx + 8*rdi + 8], xmm0
  3788  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3789  	LONG $0x442a0ff2; WORD $0x08ba // cvtsi2sd    xmm0, dword [rdx + 4*rdi + 8]
  3790  	LONG $0x44110ff2; WORD $0x10f9 // movsd    qword [rcx + 8*rdi + 16], xmm0
  3791  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  3792  	LONG $0x442a0ff2; WORD $0x0cba // cvtsi2sd    xmm0, dword [rdx + 4*rdi + 12]
  3793  	LONG $0x44110ff2; WORD $0x18f9 // movsd    qword [rcx + 8*rdi + 24], xmm0
  3794  	LONG $0x04c78348               // add    rdi, 4
  3795  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3796  	JNE  LBB0_515
  3797  
  3798  LBB0_516:
  3799  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3800  	JE   LBB0_1526
  3801  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3802  	LONG $0xba148d48         // lea    rdx, [rdx + 4*rdi]
  3803  	WORD $0xf631             // xor    esi, esi
  3804  
  3805  LBB0_518:
  3806  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  3807  	LONG $0x042a0ff2; BYTE $0xb2 // cvtsi2sd    xmm0, dword [rdx + 4*rsi]
  3808  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  3809  	LONG $0x01c68348             // add    rsi, 1
  3810  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
  3811  	JNE  LBB0_518
  3812  	JMP  LBB0_1526
  3813  
  3814  LBB0_549:
  3815  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3816  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3817  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3818  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  3819  	LONG $0x02efc148         // shr    rdi, 2
  3820  	LONG $0x01c78348         // add    rdi, 1
  3821  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  3822  	LONG $0x03e08341         // and    r8d, 3
  3823  	LONG $0x0cf88348         // cmp    rax, 12
  3824  	JAE  LBB0_801
  3825  	WORD $0xc031             // xor    eax, eax
  3826  	JMP  LBB0_803
  3827  
  3828  LBB0_551:
  3829  	LONG $0xfce18341             // and    r9d, -4
  3830  	WORD $0xc031                 // xor    eax, eax
  3831  	LONG $0x45100ff2; BYTE $0x00 // movsd    xmm0, qword 0[rbp] /* [rip + .LCPI0_0] */
  3832  
  3833  LBB0_552:
  3834  	LONG $0x0c100ff2; BYTE $0xc2   // movsd    xmm1, qword [rdx + 8*rax]
  3835  	LONG $0xd1280f66               // movapd    xmm2, xmm1
  3836  	LONG $0xd05c0ff2               // subsd    xmm2, xmm0
  3837  	LONG $0x2c0f48f2; BYTE $0xfa   // cvttsd2si    rdi, xmm2
  3838  	WORD $0x314c; BYTE $0xd7       // xor    rdi, r10
  3839  	LONG $0x2c0f48f2; BYTE $0xf1   // cvttsd2si    rsi, xmm1
  3840  	LONG $0xc12e0f66               // ucomisd    xmm0, xmm1
  3841  	LONG $0xf7460f48               // cmovbe    rsi, rdi
  3842  	LONG $0xc1348948               // mov    qword [rcx + 8*rax], rsi
  3843  	LONG $0x4c100ff2; WORD $0x08c2 // movsd    xmm1, qword [rdx + 8*rax + 8]
  3844  	LONG $0xd1280f66               // movapd    xmm2, xmm1
  3845  	LONG $0xd05c0ff2               // subsd    xmm2, xmm0
  3846  	LONG $0x2c0f48f2; BYTE $0xf2   // cvttsd2si    rsi, xmm2
  3847  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  3848  	LONG $0x2c0f48f2; BYTE $0xf9   // cvttsd2si    rdi, xmm1
  3849  	LONG $0xc12e0f66               // ucomisd    xmm0, xmm1
  3850  	LONG $0xfe460f48               // cmovbe    rdi, rsi
  3851  	LONG $0xc17c8948; BYTE $0x08   // mov    qword [rcx + 8*rax + 8], rdi
  3852  	LONG $0x4c100ff2; WORD $0x10c2 // movsd    xmm1, qword [rdx + 8*rax + 16]
  3853  	LONG $0xd1280f66               // movapd    xmm2, xmm1
  3854  	LONG $0xd05c0ff2               // subsd    xmm2, xmm0
  3855  	LONG $0x2c0f48f2; BYTE $0xf2   // cvttsd2si    rsi, xmm2
  3856  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  3857  	LONG $0x2c0f48f2; BYTE $0xf9   // cvttsd2si    rdi, xmm1
  3858  	LONG $0xc12e0f66               // ucomisd    xmm0, xmm1
  3859  	LONG $0xfe460f48               // cmovbe    rdi, rsi
  3860  	LONG $0xc17c8948; BYTE $0x10   // mov    qword [rcx + 8*rax + 16], rdi
  3861  	LONG $0x4c100ff2; WORD $0x18c2 // movsd    xmm1, qword [rdx + 8*rax + 24]
  3862  	LONG $0xd1280f66               // movapd    xmm2, xmm1
  3863  	LONG $0xd05c0ff2               // subsd    xmm2, xmm0
  3864  	LONG $0x2c0f48f2; BYTE $0xf2   // cvttsd2si    rsi, xmm2
  3865  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  3866  	LONG $0x2c0f48f2; BYTE $0xf9   // cvttsd2si    rdi, xmm1
  3867  	LONG $0xc12e0f66               // ucomisd    xmm0, xmm1
  3868  	LONG $0xfe460f48               // cmovbe    rdi, rsi
  3869  	LONG $0xc17c8948; BYTE $0x18   // mov    qword [rcx + 8*rax + 24], rdi
  3870  	LONG $0x04c08348               // add    rax, 4
  3871  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
  3872  	JNE  LBB0_552
  3873  
  3874  LBB0_553:
  3875  	WORD $0x854d; BYTE $0xc0     // test    r8, r8
  3876  	JE   LBB0_1526
  3877  	LONG $0xc10c8d48             // lea    rcx, [rcx + 8*rax]
  3878  	LONG $0xc2048d48             // lea    rax, [rdx + 8*rax]
  3879  	WORD $0xd231                 // xor    edx, edx
  3880  	LONG $0x45100ff2; BYTE $0x00 // movsd    xmm0, qword 0[rbp] /* [rip + .LCPI0_0] */
  3881  
  3882  LBB0_555:
  3883  	LONG $0x0c100ff2; BYTE $0xd0 // movsd    xmm1, qword [rax + 8*rdx]
  3884  	LONG $0xd1280f66             // movapd    xmm2, xmm1
  3885  	LONG $0xd05c0ff2             // subsd    xmm2, xmm0
  3886  	LONG $0x2c0f48f2; BYTE $0xf2 // cvttsd2si    rsi, xmm2
  3887  	WORD $0x314c; BYTE $0xd6     // xor    rsi, r10
  3888  	LONG $0x2c0f48f2; BYTE $0xf9 // cvttsd2si    rdi, xmm1
  3889  	LONG $0xc12e0f66             // ucomisd    xmm0, xmm1
  3890  	LONG $0xfe460f48             // cmovbe    rdi, rsi
  3891  	LONG $0xd13c8948             // mov    qword [rcx + 8*rdx], rdi
  3892  	LONG $0x01c28348             // add    rdx, 1
  3893  	WORD $0x3949; BYTE $0xd0     // cmp    r8, rdx
  3894  	JNE  LBB0_555
  3895  	JMP  LBB0_1526
  3896  
  3897  LBB0_562:
  3898  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3899  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3900  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3901  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  3902  	LONG $0x02efc148         // shr    rdi, 2
  3903  	LONG $0x01c78348         // add    rdi, 1
  3904  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  3905  	LONG $0x03e08341         // and    r8d, 3
  3906  	LONG $0x0cf88348         // cmp    rax, 12
  3907  	JAE  LBB0_812
  3908  	WORD $0xc031             // xor    eax, eax
  3909  	JMP  LBB0_814
  3910  
  3911  LBB0_564:
  3912  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3913  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3914  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3915  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  3916  	LONG $0x02efc148         // shr    rdi, 2
  3917  	LONG $0x01c78348         // add    rdi, 1
  3918  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  3919  	LONG $0x03e08341         // and    r8d, 3
  3920  	LONG $0x0cf88348         // cmp    rax, 12
  3921  	JAE  LBB0_819
  3922  	WORD $0xc031             // xor    eax, eax
  3923  	JMP  LBB0_821
  3924  
  3925  LBB0_569:
  3926  	LONG $0xfce18341                       // and    r9d, -4
  3927  	WORD $0xff31                           // xor    edi, edi
  3928  	QUAD $0x0000011085100ff3               // movss    xmm0, dword 272[rbp] /* [rip + .LCPI0_2] */
  3929  	QUAD $0x000000000000ba49; WORD $0x8000 // mov    r10, -9223372036854775808
  3930  
  3931  LBB0_570:
  3932  	LONG $0x0c100ff3; BYTE $0xba   // movss    xmm1, dword [rdx + 4*rdi]
  3933  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
  3934  	LONG $0xd05c0ff3               // subss    xmm2, xmm0
  3935  	LONG $0x2c0f48f3; BYTE $0xf2   // cvttss2si    rsi, xmm2
  3936  	WORD $0x314c; BYTE $0xd6       // xor    rsi, r10
  3937  	LONG $0x2c0f48f3; BYTE $0xc1   // cvttss2si    rax, xmm1
  3938  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
  3939  	LONG $0xc6460f48               // cmovbe    rax, rsi
  3940  	LONG $0xf9048948               // mov    qword [rcx + 8*rdi], rax
  3941  	LONG $0x4c100ff3; WORD $0x04ba // movss    xmm1, dword [rdx + 4*rdi + 4]
  3942  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
  3943  	LONG $0xd05c0ff3               // subss    xmm2, xmm0
  3944  	LONG $0x2c0f48f3; BYTE $0xc2   // cvttss2si    rax, xmm2
  3945  	WORD $0x314c; BYTE $0xd0       // xor    rax, r10
  3946  	LONG $0x2c0f48f3; BYTE $0xf1   // cvttss2si    rsi, xmm1
  3947  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
  3948  	LONG $0xf0460f48               // cmovbe    rsi, rax
  3949  	LONG $0xf9748948; BYTE $0x08   // mov    qword [rcx + 8*rdi + 8], rsi
  3950  	LONG $0x4c100ff3; WORD $0x08ba // movss    xmm1, dword [rdx + 4*rdi + 8]
  3951  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
  3952  	LONG $0xd05c0ff3               // subss    xmm2, xmm0
  3953  	LONG $0x2c0f48f3; BYTE $0xc2   // cvttss2si    rax, xmm2
  3954  	WORD $0x314c; BYTE $0xd0       // xor    rax, r10
  3955  	LONG $0x2c0f48f3; BYTE $0xf1   // cvttss2si    rsi, xmm1
  3956  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
  3957  	LONG $0xf0460f48               // cmovbe    rsi, rax
  3958  	LONG $0xf9748948; BYTE $0x10   // mov    qword [rcx + 8*rdi + 16], rsi
  3959  	LONG $0x4c100ff3; WORD $0x0cba // movss    xmm1, dword [rdx + 4*rdi + 12]
  3960  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
  3961  	LONG $0xd05c0ff3               // subss    xmm2, xmm0
  3962  	LONG $0x2c0f48f3; BYTE $0xc2   // cvttss2si    rax, xmm2
  3963  	WORD $0x314c; BYTE $0xd0       // xor    rax, r10
  3964  	LONG $0x2c0f48f3; BYTE $0xf1   // cvttss2si    rsi, xmm1
  3965  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
  3966  	LONG $0xf0460f48               // cmovbe    rsi, rax
  3967  	LONG $0xf9748948; BYTE $0x18   // mov    qword [rcx + 8*rdi + 24], rsi
  3968  	LONG $0x04c78348               // add    rdi, 4
  3969  	WORD $0x3949; BYTE $0xf9       // cmp    r9, rdi
  3970  	JNE  LBB0_570
  3971  
  3972  LBB0_571:
  3973  	WORD $0x854d; BYTE $0xc0               // test    r8, r8
  3974  	JE   LBB0_1526
  3975  	LONG $0xf9048d48                       // lea    rax, [rcx + 8*rdi]
  3976  	LONG $0xba0c8d48                       // lea    rcx, [rdx + 4*rdi]
  3977  	WORD $0xd231                           // xor    edx, edx
  3978  	QUAD $0x0000011085100ff3               // movss    xmm0, dword 272[rbp] /* [rip + .LCPI0_2] */
  3979  	QUAD $0x000000000000b949; WORD $0x8000 // mov    r9, -9223372036854775808
  3980  
  3981  LBB0_573:
  3982  	LONG $0x0c100ff3; BYTE $0x91 // movss    xmm1, dword [rcx + 4*rdx]
  3983  	WORD $0x280f; BYTE $0xd1     // movaps    xmm2, xmm1
  3984  	LONG $0xd05c0ff3             // subss    xmm2, xmm0
  3985  	LONG $0x2c0f48f3; BYTE $0xfa // cvttss2si    rdi, xmm2
  3986  	WORD $0x314c; BYTE $0xcf     // xor    rdi, r9
  3987  	LONG $0x2c0f48f3; BYTE $0xf1 // cvttss2si    rsi, xmm1
  3988  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
  3989  	LONG $0xf7460f48             // cmovbe    rsi, rdi
  3990  	LONG $0xd0348948             // mov    qword [rax + 8*rdx], rsi
  3991  	LONG $0x01c28348             // add    rdx, 1
  3992  	WORD $0x3949; BYTE $0xd0     // cmp    r8, rdx
  3993  	JNE  LBB0_573
  3994  	JMP  LBB0_1526
  3995  
  3996  LBB0_577:
  3997  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  3998  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3999  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4000  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  4001  	LONG $0x02efc148         // shr    rdi, 2
  4002  	LONG $0x01c78348         // add    rdi, 1
  4003  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  4004  	LONG $0x03e08341         // and    r8d, 3
  4005  	LONG $0x0cf88348         // cmp    rax, 12
  4006  	JAE  LBB0_830
  4007  	WORD $0xc031             // xor    eax, eax
  4008  	JMP  LBB0_832
  4009  
  4010  LBB0_579:
  4011  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4012  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4013  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4014  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4015  	LONG $0x03e8c149         // shr    r8, 3
  4016  	LONG $0x01c08349         // add    r8, 1
  4017  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4018  	JE   LBB0_980
  4019  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4020  	LONG $0xfee08348         // and    rax, -2
  4021  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4022  	WORD $0xff31             // xor    edi, edi
  4023  	QUAD $0x000000b0856f0f66 // movdqa    xmm0, oword 176[rbp] /* [rip + .LCPI0_12] */
  4024  
  4025  LBB0_581:
  4026  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  4027  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  4028  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4029  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4030  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4031  	LONG $0x0c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm1
  4032  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  4033  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  4034  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4035  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4036  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4037  	LONG $0x4c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm1
  4038  	LONG $0x10c78348               // add    rdi, 16
  4039  	LONG $0x02c08348               // add    rax, 2
  4040  	JNE  LBB0_581
  4041  	JMP  LBB0_981
  4042  
  4043  LBB0_582:
  4044  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4045  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4046  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4047  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4048  	LONG $0x03e8c149         // shr    r8, 3
  4049  	LONG $0x01c08349         // add    r8, 1
  4050  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4051  	JE   LBB0_985
  4052  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4053  	LONG $0xfee08348         // and    rax, -2
  4054  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4055  	WORD $0xff31             // xor    edi, edi
  4056  	QUAD $0x000000b0856f0f66 // movdqa    xmm0, oword 176[rbp] /* [rip + .LCPI0_12] */
  4057  
  4058  LBB0_584:
  4059  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  4060  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  4061  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4062  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4063  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4064  	LONG $0x0c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm1
  4065  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  4066  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  4067  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4068  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4069  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4070  	LONG $0x4c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm1
  4071  	LONG $0x10c78348               // add    rdi, 16
  4072  	LONG $0x02c08348               // add    rax, 2
  4073  	JNE  LBB0_584
  4074  	JMP  LBB0_986
  4075  
  4076  LBB0_585:
  4077  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4078  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4079  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4080  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4081  	LONG $0x02e8c149         // shr    r8, 2
  4082  	LONG $0x01c08349         // add    r8, 1
  4083  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4084  	JE   LBB0_990
  4085  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4086  	LONG $0xfee08348         // and    rax, -2
  4087  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4088  	WORD $0xff31             // xor    edi, edi
  4089  
  4090  LBB0_587:
  4091  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  4092  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  4093  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4094  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4095  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4096  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4097  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4098  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4099  	LONG $0x44100f66; WORD $0x20fa // movupd    xmm0, oword [rdx + 8*rdi + 32]
  4100  	LONG $0x4c100f66; WORD $0x30fa // movupd    xmm1, oword [rdx + 8*rdi + 48]
  4101  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4102  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4103  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4104  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4105  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4106  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4107  	LONG $0x08c78348               // add    rdi, 8
  4108  	LONG $0x02c08348               // add    rax, 2
  4109  	JNE  LBB0_587
  4110  	JMP  LBB0_991
  4111  
  4112  LBB0_588:
  4113  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4114  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4115  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4116  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4117  	LONG $0x02e8c149         // shr    r8, 2
  4118  	LONG $0x01c08349         // add    r8, 1
  4119  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4120  	JE   LBB0_995
  4121  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4122  	LONG $0xfee08348         // and    rax, -2
  4123  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4124  	WORD $0xff31             // xor    edi, edi
  4125  
  4126  LBB0_590:
  4127  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  4128  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  4129  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4130  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4131  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4132  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4133  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4134  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4135  	LONG $0x44100f66; WORD $0x20fa // movupd    xmm0, oword [rdx + 8*rdi + 32]
  4136  	LONG $0x4c100f66; WORD $0x30fa // movupd    xmm1, oword [rdx + 8*rdi + 48]
  4137  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4138  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4139  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4140  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4141  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4142  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4143  	LONG $0x08c78348               // add    rdi, 8
  4144  	LONG $0x02c08348               // add    rax, 2
  4145  	JNE  LBB0_590
  4146  	JMP  LBB0_996
  4147  
  4148  LBB0_597:
  4149  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4150  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4151  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4152  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4153  	LONG $0x02e8c149         // shr    r8, 2
  4154  	LONG $0x01c08349         // add    r8, 1
  4155  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4156  	JE   LBB0_1000
  4157  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4158  	LONG $0xfee08348         // and    rax, -2
  4159  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4160  	WORD $0xff31             // xor    edi, edi
  4161  
  4162  LBB0_599:
  4163  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  4164  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4165  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4166  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4167  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4168  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4169  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4170  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4171  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4172  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4173  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4174  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4175  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4176  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4177  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4178  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4179  	LONG $0x08c78348               // add    rdi, 8
  4180  	LONG $0x02c08348               // add    rax, 2
  4181  	JNE  LBB0_599
  4182  	JMP  LBB0_1001
  4183  
  4184  LBB0_600:
  4185  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4186  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4187  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4188  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4189  	LONG $0x02e8c149         // shr    r8, 2
  4190  	LONG $0x01c08349         // add    r8, 1
  4191  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4192  	JE   LBB0_1005
  4193  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4194  	LONG $0xfee08348         // and    rax, -2
  4195  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4196  	WORD $0xff31             // xor    edi, edi
  4197  
  4198  LBB0_602:
  4199  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  4200  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4201  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4202  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4203  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4204  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4205  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4206  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4207  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4208  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4209  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4210  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4211  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4212  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4213  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4214  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4215  	LONG $0x08c78348               // add    rdi, 8
  4216  	LONG $0x02c08348               // add    rax, 2
  4217  	JNE  LBB0_602
  4218  	JMP  LBB0_1006
  4219  
  4220  LBB0_615:
  4221  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4222  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4223  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4224  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4225  	LONG $0x02e8c149         // shr    r8, 2
  4226  	LONG $0x01c08349         // add    r8, 1
  4227  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4228  	JE   LBB0_1010
  4229  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4230  	LONG $0xfee08348         // and    rax, -2
  4231  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4232  	WORD $0xff31             // xor    edi, edi
  4233  
  4234  LBB0_617:
  4235  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  4236  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4237  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4238  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4239  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4240  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4241  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4242  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4243  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4244  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4245  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4246  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4247  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4248  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4249  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4250  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4251  	LONG $0x08c78348               // add    rdi, 8
  4252  	LONG $0x02c08348               // add    rax, 2
  4253  	JNE  LBB0_617
  4254  	JMP  LBB0_1011
  4255  
  4256  LBB0_618:
  4257  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4258  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4259  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4260  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4261  	LONG $0x02e8c149         // shr    r8, 2
  4262  	LONG $0x01c08349         // add    r8, 1
  4263  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4264  	JE   LBB0_1015
  4265  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4266  	LONG $0xfee08348         // and    rax, -2
  4267  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4268  	WORD $0xff31             // xor    edi, edi
  4269  
  4270  LBB0_620:
  4271  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  4272  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4273  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4274  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4275  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4276  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4277  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  4278  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  4279  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4280  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4281  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4282  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  4283  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4284  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  4285  	LONG $0x447e0f66; WORD $0x0879 // movd    dword [rcx + 2*rdi + 8], xmm0
  4286  	LONG $0x4c7e0f66; WORD $0x0c79 // movd    dword [rcx + 2*rdi + 12], xmm1
  4287  	LONG $0x08c78348               // add    rdi, 8
  4288  	LONG $0x02c08348               // add    rax, 2
  4289  	JNE  LBB0_620
  4290  	JMP  LBB0_1016
  4291  
  4292  LBB0_621:
  4293  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4294  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4295  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4296  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4297  	LONG $0x03e8c149         // shr    r8, 3
  4298  	LONG $0x01c08349         // add    r8, 1
  4299  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4300  	JE   LBB0_1020
  4301  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4302  	LONG $0xfee08348         // and    rax, -2
  4303  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4304  	WORD $0xff31             // xor    edi, edi
  4305  
  4306  LBB0_623:
  4307  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  4308  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  4309  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  4310  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  4311  	LONG $0x2b380f66; BYTE $0xc1   // packusdw    xmm0, xmm1
  4312  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  4313  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  4314  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  4315  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  4316  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  4317  	LONG $0x2b380f66; BYTE $0xc1   // packusdw    xmm0, xmm1
  4318  	LONG $0x447f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm0
  4319  	LONG $0x10c78348               // add    rdi, 16
  4320  	LONG $0x02c08348               // add    rax, 2
  4321  	JNE  LBB0_623
  4322  	JMP  LBB0_1021
  4323  
  4324  LBB0_624:
  4325  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4326  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4327  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4328  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4329  	LONG $0x03e8c149         // shr    r8, 3
  4330  	LONG $0x01c08349         // add    r8, 1
  4331  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4332  	JE   LBB0_1025
  4333  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4334  	LONG $0xfee08348         // and    rax, -2
  4335  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4336  	WORD $0xff31             // xor    edi, edi
  4337  
  4338  LBB0_626:
  4339  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  4340  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  4341  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  4342  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  4343  	LONG $0xc16b0f66               // packssdw    xmm0, xmm1
  4344  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  4345  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  4346  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  4347  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  4348  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  4349  	LONG $0xc16b0f66               // packssdw    xmm0, xmm1
  4350  	LONG $0x447f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm0
  4351  	LONG $0x10c78348               // add    rdi, 16
  4352  	LONG $0x02c08348               // add    rax, 2
  4353  	JNE  LBB0_626
  4354  	JMP  LBB0_1026
  4355  
  4356  LBB0_633:
  4357  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4358  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4359  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4360  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4361  	LONG $0x03e8c149         // shr    r8, 3
  4362  	LONG $0x01c08349         // add    r8, 1
  4363  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4364  	JE   LBB0_1030
  4365  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4366  	LONG $0xfee08348         // and    rax, -2
  4367  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4368  	WORD $0xff31             // xor    edi, edi
  4369  	QUAD $0x000000b0856f0f66 // movdqa    xmm0, oword 176[rbp] /* [rip + .LCPI0_12] */
  4370  
  4371  LBB0_635:
  4372  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  4373  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  4374  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4375  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4376  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4377  	LONG $0x0c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm1
  4378  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  4379  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  4380  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4381  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4382  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4383  	LONG $0x4c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm1
  4384  	LONG $0x10c78348               // add    rdi, 16
  4385  	LONG $0x02c08348               // add    rax, 2
  4386  	JNE  LBB0_635
  4387  	JMP  LBB0_1031
  4388  
  4389  LBB0_636:
  4390  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4391  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4392  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4393  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4394  	LONG $0x03e8c149         // shr    r8, 3
  4395  	LONG $0x01c08349         // add    r8, 1
  4396  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4397  	JE   LBB0_1035
  4398  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4399  	LONG $0xfee08348         // and    rax, -2
  4400  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4401  	WORD $0xff31             // xor    edi, edi
  4402  	QUAD $0x000000b0856f0f66 // movdqa    xmm0, oword 176[rbp] /* [rip + .LCPI0_12] */
  4403  
  4404  LBB0_638:
  4405  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  4406  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  4407  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4408  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4409  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4410  	LONG $0x0c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm1
  4411  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  4412  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  4413  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  4414  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  4415  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  4416  	LONG $0x4c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm1
  4417  	LONG $0x10c78348               // add    rdi, 16
  4418  	LONG $0x02c08348               // add    rax, 2
  4419  	JNE  LBB0_638
  4420  	JMP  LBB0_1036
  4421  
  4422  LBB0_639:
  4423  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4424  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4425  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4426  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  4427  	LONG $0x02efc148         // shr    rdi, 2
  4428  	LONG $0x01c78348         // add    rdi, 1
  4429  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  4430  	LONG $0x03e08341         // and    r8d, 3
  4431  	LONG $0x0cf88348         // cmp    rax, 12
  4432  	JAE  LBB0_857
  4433  	WORD $0xc031             // xor    eax, eax
  4434  	JMP  LBB0_859
  4435  
  4436  LBB0_641:
  4437  	WORD $0x8944; BYTE $0xce                   // mov    esi, r9d
  4438  	WORD $0xe683; BYTE $0xf8                   // and    esi, -8
  4439  	LONG $0xf8468d48                           // lea    rax, [rsi - 8]
  4440  	WORD $0x8949; BYTE $0xc0                   // mov    r8, rax
  4441  	LONG $0x03e8c149                           // shr    r8, 3
  4442  	LONG $0x01c08349                           // add    r8, 1
  4443  	WORD $0x8548; BYTE $0xc0                   // test    rax, rax
  4444  	JE   LBB0_1040
  4445  	WORD $0x894c; BYTE $0xc0                   // mov    rax, r8
  4446  	LONG $0xfee08348                           // and    rax, -2
  4447  	WORD $0xf748; BYTE $0xd8                   // neg    rax
  4448  	WORD $0xff31                               // xor    edi, edi
  4449  	QUAD $0x000000d0856f0f66                   // movdqa    xmm0, oword 208[rbp] /* [rip + .LCPI0_14] */
  4450  	QUAD $0x000000e08d6f0f66                   // movdqa    xmm1, oword 224[rbp] /* [rip + .LCPI0_15] */
  4451  	LONG $0xf095280f; WORD $0x0000; BYTE $0x00 // movaps    xmm2, oword 240[rbp] /* [rip + .LCPI0_16] */
  4452  
  4453  LBB0_643:
  4454  	LONG $0x1c6f0ff3; BYTE $0xba   // movdqu    xmm3, oword [rdx + 4*rdi]
  4455  	LONG $0x646f0ff3; WORD $0x10ba // movdqu    xmm4, oword [rdx + 4*rdi + 16]
  4456  	LONG $0xeb6f0f66               // movdqa    xmm5, xmm3
  4457  	LONG $0x0e3a0f66; WORD $0xaae8 // pblendw    xmm5, xmm0, 170
  4458  	LONG $0xd3720f66; BYTE $0x10   // psrld    xmm3, 16
  4459  	LONG $0x0e3a0f66; WORD $0xaad9 // pblendw    xmm3, xmm1, 170
  4460  	WORD $0x5c0f; BYTE $0xda       // subps    xmm3, xmm2
  4461  	WORD $0x580f; BYTE $0xdd       // addps    xmm3, xmm5
  4462  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
  4463  	LONG $0x0e3a0f66; WORD $0xaae8 // pblendw    xmm5, xmm0, 170
  4464  	LONG $0xd4720f66; BYTE $0x10   // psrld    xmm4, 16
  4465  	LONG $0x0e3a0f66; WORD $0xaae1 // pblendw    xmm4, xmm1, 170
  4466  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
  4467  	WORD $0x580f; BYTE $0xe5       // addps    xmm4, xmm5
  4468  	LONG $0xb91c110f               // movups    oword [rcx + 4*rdi], xmm3
  4469  	LONG $0xb964110f; BYTE $0x10   // movups    oword [rcx + 4*rdi + 16], xmm4
  4470  	LONG $0x5c6f0ff3; WORD $0x20ba // movdqu    xmm3, oword [rdx + 4*rdi + 32]
  4471  	LONG $0x646f0ff3; WORD $0x30ba // movdqu    xmm4, oword [rdx + 4*rdi + 48]
  4472  	LONG $0xeb6f0f66               // movdqa    xmm5, xmm3
  4473  	LONG $0x0e3a0f66; WORD $0xaae8 // pblendw    xmm5, xmm0, 170
  4474  	LONG $0xd3720f66; BYTE $0x10   // psrld    xmm3, 16
  4475  	LONG $0x0e3a0f66; WORD $0xaad9 // pblendw    xmm3, xmm1, 170
  4476  	WORD $0x5c0f; BYTE $0xda       // subps    xmm3, xmm2
  4477  	WORD $0x580f; BYTE $0xdd       // addps    xmm3, xmm5
  4478  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
  4479  	LONG $0x0e3a0f66; WORD $0xaae8 // pblendw    xmm5, xmm0, 170
  4480  	LONG $0xd4720f66; BYTE $0x10   // psrld    xmm4, 16
  4481  	LONG $0x0e3a0f66; WORD $0xaae1 // pblendw    xmm4, xmm1, 170
  4482  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
  4483  	WORD $0x580f; BYTE $0xe5       // addps    xmm4, xmm5
  4484  	LONG $0xb95c110f; BYTE $0x20   // movups    oword [rcx + 4*rdi + 32], xmm3
  4485  	LONG $0xb964110f; BYTE $0x30   // movups    oword [rcx + 4*rdi + 48], xmm4
  4486  	LONG $0x10c78348               // add    rdi, 16
  4487  	LONG $0x02c08348               // add    rax, 2
  4488  	JNE  LBB0_643
  4489  	JMP  LBB0_1041
  4490  
  4491  LBB0_644:
  4492  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4493  	WORD $0xff31             // xor    edi, edi
  4494  
  4495  LBB0_645:
  4496  	LONG $0x2c0f48f2; WORD $0xfa04             // cvttsd2si    rax, qword [rdx + 8*rdi]
  4497  	LONG $0xf9048948                           // mov    qword [rcx + 8*rdi], rax
  4498  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x08 // cvttsd2si    rax, qword [rdx + 8*rdi + 8]
  4499  	LONG $0xf9448948; BYTE $0x08               // mov    qword [rcx + 8*rdi + 8], rax
  4500  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x10 // cvttsd2si    rax, qword [rdx + 8*rdi + 16]
  4501  	LONG $0xf9448948; BYTE $0x10               // mov    qword [rcx + 8*rdi + 16], rax
  4502  	LONG $0x2c0f48f2; WORD $0xfa44; BYTE $0x18 // cvttsd2si    rax, qword [rdx + 8*rdi + 24]
  4503  	LONG $0xf9448948; BYTE $0x18               // mov    qword [rcx + 8*rdi + 24], rax
  4504  	LONG $0x04c78348                           // add    rdi, 4
  4505  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  4506  	JNE  LBB0_645
  4507  
  4508  LBB0_646:
  4509  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  4510  	JE   LBB0_1526
  4511  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  4512  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  4513  	WORD $0xf631             // xor    esi, esi
  4514  
  4515  LBB0_648:
  4516  	LONG $0x2c0f48f2; WORD $0xf204 // cvttsd2si    rax, qword [rdx + 8*rsi]
  4517  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  4518  	LONG $0x01c68348               // add    rsi, 1
  4519  	WORD $0x3949; BYTE $0xf0       // cmp    r8, rsi
  4520  	JNE  LBB0_648
  4521  	JMP  LBB0_1526
  4522  
  4523  LBB0_649:
  4524  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4525  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4526  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4527  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4528  	LONG $0x02e8c149         // shr    r8, 2
  4529  	LONG $0x01c08349         // add    r8, 1
  4530  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4531  	JE   LBB0_1045
  4532  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4533  	LONG $0xfee08348         // and    rax, -2
  4534  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4535  	WORD $0xff31             // xor    edi, edi
  4536  
  4537  LBB0_651:
  4538  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  4539  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  4540  	LONG $0xc05a0f66               // cvtpd2ps    xmm0, xmm0
  4541  	LONG $0xc95a0f66               // cvtpd2ps    xmm1, xmm1
  4542  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  4543  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
  4544  	LONG $0x44100f66; WORD $0x20fa // movupd    xmm0, oword [rdx + 8*rdi + 32]
  4545  	LONG $0x4c100f66; WORD $0x30fa // movupd    xmm1, oword [rdx + 8*rdi + 48]
  4546  	LONG $0xc05a0f66               // cvtpd2ps    xmm0, xmm0
  4547  	LONG $0xc95a0f66               // cvtpd2ps    xmm1, xmm1
  4548  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  4549  	LONG $0x44110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm0
  4550  	LONG $0x08c78348               // add    rdi, 8
  4551  	LONG $0x02c08348               // add    rax, 2
  4552  	JNE  LBB0_651
  4553  	JMP  LBB0_1046
  4554  
  4555  LBB0_661:
  4556  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4557  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4558  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4559  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4560  	LONG $0x02e8c149         // shr    r8, 2
  4561  	LONG $0x01c08349         // add    r8, 1
  4562  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4563  	JE   LBB0_1050
  4564  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
  4565  	LONG $0xfee28349         // and    r10, -2
  4566  	WORD $0xf749; BYTE $0xda // neg    r10
  4567  	WORD $0xff31             // xor    edi, edi
  4568  	QUAD $0x000000a0956f0f66 // movdqa    xmm2, oword 160[rbp] /* [rip + .LCPI0_11] */
  4569  
  4570  LBB0_663:
  4571  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  4572  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
  4573  	LONG $0xcadb0f66                           // pand    xmm1, xmm2
  4574  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
  4575  	LONG $0xd3730f66; BYTE $0x01               // psrlq    xmm3, 1
  4576  	LONG $0xd9eb0f66                           // por    xmm3, xmm1
  4577  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
  4578  	LONG $0x37380f66; BYTE $0xe0               // pcmpgtq    xmm4, xmm0
  4579  	LONG $0x15380f66; BYTE $0xc3               // blendvpd    xmm0, xmm3, xmm0
  4580  	LONG $0x3a0f4866; WORD $0xc016; BYTE $0x01 // pextrq    rax, xmm0, 1
  4581  	WORD $0x570f; BYTE $0xed                   // xorps    xmm5, xmm5
  4582  	LONG $0x2a0f48f3; BYTE $0xe8               // cvtsi2ss    xmm5, rax
  4583  	LONG $0x7e0f4866; BYTE $0xc0               // movq    rax, xmm0
  4584  	WORD $0x570f; BYTE $0xdb                   // xorps    xmm3, xmm3
  4585  	LONG $0x2a0f48f3; BYTE $0xd8               // cvtsi2ss    xmm3, rax
  4586  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4587  	LONG $0x213a0f66; WORD $0x1cdd             // insertps    xmm3, xmm5, 28
  4588  	WORD $0x280f; BYTE $0xeb                   // movaps    xmm5, xmm3
  4589  	WORD $0x580f; BYTE $0xeb                   // addps    xmm5, xmm3
  4590  	LONG $0xc4700f66; BYTE $0xed               // pshufd    xmm0, xmm4, 237
  4591  	LONG $0x14380f66; BYTE $0xdd               // blendvps    xmm3, xmm5, xmm0
  4592  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
  4593  	LONG $0xc2db0f66                           // pand    xmm0, xmm2
  4594  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
  4595  	LONG $0xd4730f66; BYTE $0x01               // psrlq    xmm4, 1
  4596  	LONG $0xe0eb0f66                           // por    xmm4, xmm0
  4597  	WORD $0x570f; BYTE $0xed                   // xorps    xmm5, xmm5
  4598  	LONG $0x37380f66; BYTE $0xe9               // pcmpgtq    xmm5, xmm1
  4599  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
  4600  	LONG $0x15380f66; BYTE $0xcc               // blendvpd    xmm1, xmm4, xmm0
  4601  	LONG $0x3a0f4866; WORD $0xc816; BYTE $0x01 // pextrq    rax, xmm1, 1
  4602  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4603  	LONG $0x2a0f48f3; BYTE $0xc0               // cvtsi2ss    xmm0, rax
  4604  	LONG $0x7e0f4866; BYTE $0xc8               // movq    rax, xmm1
  4605  	WORD $0x570f; BYTE $0xc9                   // xorps    xmm1, xmm1
  4606  	LONG $0x2a0f48f3; BYTE $0xc8               // cvtsi2ss    xmm1, rax
  4607  	LONG $0x213a0f66; WORD $0x1cc8             // insertps    xmm1, xmm0, 28
  4608  	WORD $0x280f; BYTE $0xe1                   // movaps    xmm4, xmm1
  4609  	WORD $0x580f; BYTE $0xe1                   // addps    xmm4, xmm1
  4610  	LONG $0xc5700f66; BYTE $0xed               // pshufd    xmm0, xmm5, 237
  4611  	LONG $0x14380f66; BYTE $0xcc               // blendvps    xmm1, xmm4, xmm0
  4612  	WORD $0x160f; BYTE $0xd9                   // movlhps    xmm3, xmm1
  4613  	LONG $0xb91c110f                           // movups    oword [rcx + 4*rdi], xmm3
  4614  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4615  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
  4616  	LONG $0xcadb0f66                           // pand    xmm1, xmm2
  4617  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
  4618  	LONG $0xd3730f66; BYTE $0x01               // psrlq    xmm3, 1
  4619  	LONG $0xd9eb0f66                           // por    xmm3, xmm1
  4620  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
  4621  	LONG $0x37380f66; BYTE $0xe0               // pcmpgtq    xmm4, xmm0
  4622  	LONG $0x15380f66; BYTE $0xc3               // blendvpd    xmm0, xmm3, xmm0
  4623  	LONG $0x3a0f4866; WORD $0xc016; BYTE $0x01 // pextrq    rax, xmm0, 1
  4624  	WORD $0x570f; BYTE $0xed                   // xorps    xmm5, xmm5
  4625  	LONG $0x2a0f48f3; BYTE $0xe8               // cvtsi2ss    xmm5, rax
  4626  	LONG $0x7e0f4866; BYTE $0xc0               // movq    rax, xmm0
  4627  	WORD $0x570f; BYTE $0xdb                   // xorps    xmm3, xmm3
  4628  	LONG $0x2a0f48f3; BYTE $0xd8               // cvtsi2ss    xmm3, rax
  4629  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4630  	LONG $0x213a0f66; WORD $0x1cdd             // insertps    xmm3, xmm5, 28
  4631  	WORD $0x280f; BYTE $0xeb                   // movaps    xmm5, xmm3
  4632  	WORD $0x580f; BYTE $0xeb                   // addps    xmm5, xmm3
  4633  	LONG $0xc4700f66; BYTE $0xed               // pshufd    xmm0, xmm4, 237
  4634  	LONG $0x14380f66; BYTE $0xdd               // blendvps    xmm3, xmm5, xmm0
  4635  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
  4636  	LONG $0xc2db0f66                           // pand    xmm0, xmm2
  4637  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
  4638  	LONG $0xd4730f66; BYTE $0x01               // psrlq    xmm4, 1
  4639  	LONG $0xe0eb0f66                           // por    xmm4, xmm0
  4640  	WORD $0x570f; BYTE $0xed                   // xorps    xmm5, xmm5
  4641  	LONG $0x37380f66; BYTE $0xe9               // pcmpgtq    xmm5, xmm1
  4642  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
  4643  	LONG $0x15380f66; BYTE $0xcc               // blendvpd    xmm1, xmm4, xmm0
  4644  	LONG $0x3a0f4866; WORD $0xc816; BYTE $0x01 // pextrq    rax, xmm1, 1
  4645  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4646  	LONG $0x2a0f48f3; BYTE $0xc0               // cvtsi2ss    xmm0, rax
  4647  	LONG $0x7e0f4866; BYTE $0xc8               // movq    rax, xmm1
  4648  	WORD $0x570f; BYTE $0xc9                   // xorps    xmm1, xmm1
  4649  	LONG $0x2a0f48f3; BYTE $0xc8               // cvtsi2ss    xmm1, rax
  4650  	LONG $0x213a0f66; WORD $0x1cc8             // insertps    xmm1, xmm0, 28
  4651  	WORD $0x280f; BYTE $0xe1                   // movaps    xmm4, xmm1
  4652  	WORD $0x580f; BYTE $0xe1                   // addps    xmm4, xmm1
  4653  	LONG $0xc5700f66; BYTE $0xed               // pshufd    xmm0, xmm5, 237
  4654  	LONG $0x14380f66; BYTE $0xcc               // blendvps    xmm1, xmm4, xmm0
  4655  	WORD $0x160f; BYTE $0xd9                   // movlhps    xmm3, xmm1
  4656  	LONG $0xb95c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm3
  4657  	LONG $0x08c78348                           // add    rdi, 8
  4658  	LONG $0x02c28349                           // add    r10, 2
  4659  	JNE  LBB0_663
  4660  	JMP  LBB0_1051
  4661  
  4662  LBB0_664:
  4663  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4664  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4665  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4666  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  4667  	LONG $0x02efc148         // shr    rdi, 2
  4668  	LONG $0x01c78348         // add    rdi, 1
  4669  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  4670  	LONG $0x03e08341         // and    r8d, 3
  4671  	LONG $0x0cf88348         // cmp    rax, 12
  4672  	JAE  LBB0_871
  4673  	WORD $0xc031             // xor    eax, eax
  4674  	JMP  LBB0_873
  4675  
  4676  LBB0_666:
  4677  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4678  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4679  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4680  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4681  	LONG $0x03e8c149         // shr    r8, 3
  4682  	LONG $0x01c08349         // add    r8, 1
  4683  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4684  	JE   LBB0_1058
  4685  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4686  	LONG $0xfee08348         // and    rax, -2
  4687  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4688  	WORD $0xff31             // xor    edi, edi
  4689  
  4690  LBB0_668:
  4691  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  4692  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  4693  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  4694  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  4695  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  4696  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  4697  	LONG $0x33380f66; WORD $0x7a44; BYTE $0x10 // pmovzxwd    xmm0, qword [rdx + 2*rdi + 16]
  4698  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x18 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 24]
  4699  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  4700  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  4701  	LONG $0xb944110f; BYTE $0x20               // movups    oword [rcx + 4*rdi + 32], xmm0
  4702  	LONG $0xb94c110f; BYTE $0x30               // movups    oword [rcx + 4*rdi + 48], xmm1
  4703  	LONG $0x10c78348                           // add    rdi, 16
  4704  	LONG $0x02c08348                           // add    rax, 2
  4705  	JNE  LBB0_668
  4706  	JMP  LBB0_1059
  4707  
  4708  LBB0_669:
  4709  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4710  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4711  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4712  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  4713  	LONG $0x02efc148         // shr    rdi, 2
  4714  	LONG $0x01c78348         // add    rdi, 1
  4715  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  4716  	LONG $0x03e08341         // and    r8d, 3
  4717  	LONG $0x0cf88348         // cmp    rax, 12
  4718  	JAE  LBB0_878
  4719  	WORD $0xc031             // xor    eax, eax
  4720  	JMP  LBB0_880
  4721  
  4722  LBB0_671:
  4723  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4724  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4725  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4726  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4727  	LONG $0x03e8c149         // shr    r8, 3
  4728  	LONG $0x01c08349         // add    r8, 1
  4729  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4730  	JE   LBB0_1063
  4731  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4732  	LONG $0xfee08348         // and    rax, -2
  4733  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4734  	WORD $0xff31             // xor    edi, edi
  4735  
  4736  LBB0_673:
  4737  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  4738  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  4739  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  4740  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  4741  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  4742  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  4743  	LONG $0x23380f66; WORD $0x7a44; BYTE $0x10 // pmovsxwd    xmm0, qword [rdx + 2*rdi + 16]
  4744  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x18 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 24]
  4745  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  4746  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  4747  	LONG $0xb944110f; BYTE $0x20               // movups    oword [rcx + 4*rdi + 32], xmm0
  4748  	LONG $0xb94c110f; BYTE $0x30               // movups    oword [rcx + 4*rdi + 48], xmm1
  4749  	LONG $0x10c78348                           // add    rdi, 16
  4750  	LONG $0x02c08348                           // add    rax, 2
  4751  	JNE  LBB0_673
  4752  	JMP  LBB0_1064
  4753  
  4754  LBB0_677:
  4755  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4756  	WORD $0xff31             // xor    edi, edi
  4757  
  4758  LBB0_678:
  4759  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4760  	LONG $0x2a0f48f3; WORD $0xfa04             // cvtsi2ss    xmm0, qword [rdx + 8*rdi]
  4761  	LONG $0x04110ff3; BYTE $0xb9               // movss    dword [rcx + 4*rdi], xmm0
  4762  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4763  	LONG $0x2a0f48f3; WORD $0xfa44; BYTE $0x08 // cvtsi2ss    xmm0, qword [rdx + 8*rdi + 8]
  4764  	LONG $0x44110ff3; WORD $0x04b9             // movss    dword [rcx + 4*rdi + 4], xmm0
  4765  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4766  	LONG $0x2a0f48f3; WORD $0xfa44; BYTE $0x10 // cvtsi2ss    xmm0, qword [rdx + 8*rdi + 16]
  4767  	LONG $0x44110ff3; WORD $0x08b9             // movss    dword [rcx + 4*rdi + 8], xmm0
  4768  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  4769  	LONG $0x2a0f48f3; WORD $0xfa44; BYTE $0x18 // cvtsi2ss    xmm0, qword [rdx + 8*rdi + 24]
  4770  	LONG $0x44110ff3; WORD $0x0cb9             // movss    dword [rcx + 4*rdi + 12], xmm0
  4771  	LONG $0x04c78348                           // add    rdi, 4
  4772  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  4773  	JNE  LBB0_678
  4774  
  4775  LBB0_679:
  4776  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4777  	JE   LBB0_1526
  4778  	LONG $0xb90c8d48         // lea    rcx, [rcx + 4*rdi]
  4779  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  4780  	WORD $0xf631             // xor    esi, esi
  4781  
  4782  LBB0_681:
  4783  	WORD $0x570f; BYTE $0xc0       // xorps    xmm0, xmm0
  4784  	LONG $0x2a0f48f3; WORD $0xf204 // cvtsi2ss    xmm0, qword [rdx + 8*rsi]
  4785  	LONG $0x04110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm0
  4786  	LONG $0x01c68348               // add    rsi, 1
  4787  	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
  4788  	JNE  LBB0_681
  4789  	JMP  LBB0_1526
  4790  
  4791  LBB0_682:
  4792  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4793  	WORD $0xff31             // xor    edi, edi
  4794  
  4795  LBB0_683:
  4796  	LONG $0x2c0f48f3; WORD $0xba04             // cvttss2si    rax, dword [rdx + 4*rdi]
  4797  	LONG $0xf9048948                           // mov    qword [rcx + 8*rdi], rax
  4798  	LONG $0x2c0f48f3; WORD $0xba44; BYTE $0x04 // cvttss2si    rax, dword [rdx + 4*rdi + 4]
  4799  	LONG $0xf9448948; BYTE $0x08               // mov    qword [rcx + 8*rdi + 8], rax
  4800  	LONG $0x2c0f48f3; WORD $0xba44; BYTE $0x08 // cvttss2si    rax, dword [rdx + 4*rdi + 8]
  4801  	LONG $0xf9448948; BYTE $0x10               // mov    qword [rcx + 8*rdi + 16], rax
  4802  	LONG $0x2c0f48f3; WORD $0xba44; BYTE $0x0c // cvttss2si    rax, dword [rdx + 4*rdi + 12]
  4803  	LONG $0xf9448948; BYTE $0x18               // mov    qword [rcx + 8*rdi + 24], rax
  4804  	LONG $0x04c78348                           // add    rdi, 4
  4805  	WORD $0x3948; BYTE $0xfe                   // cmp    rsi, rdi
  4806  	JNE  LBB0_683
  4807  
  4808  LBB0_684:
  4809  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  4810  	JE   LBB0_1526
  4811  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  4812  	LONG $0xba148d48         // lea    rdx, [rdx + 4*rdi]
  4813  	WORD $0xf631             // xor    esi, esi
  4814  
  4815  LBB0_686:
  4816  	LONG $0x2c0f48f3; WORD $0xb204 // cvttss2si    rax, dword [rdx + 4*rsi]
  4817  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
  4818  	LONG $0x01c68348               // add    rsi, 1
  4819  	WORD $0x3949; BYTE $0xf0       // cmp    r8, rsi
  4820  	JNE  LBB0_686
  4821  	JMP  LBB0_1526
  4822  
  4823  LBB0_696:
  4824  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4825  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4826  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4827  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  4828  	LONG $0x02efc148         // shr    rdi, 2
  4829  	LONG $0x01c78348         // add    rdi, 1
  4830  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  4831  	LONG $0x03e08341         // and    r8d, 3
  4832  	LONG $0x0cf88348         // cmp    rax, 12
  4833  	JAE  LBB0_894
  4834  	WORD $0xc031             // xor    eax, eax
  4835  	JMP  LBB0_896
  4836  
  4837  LBB0_698:
  4838  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4839  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4840  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4841  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4842  	LONG $0x03e8c149         // shr    r8, 3
  4843  	LONG $0x01c08349         // add    r8, 1
  4844  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4845  	JE   LBB0_1068
  4846  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4847  	LONG $0xfee08348         // and    rax, -2
  4848  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4849  	WORD $0xff31             // xor    edi, edi
  4850  
  4851  LBB0_700:
  4852  	LONG $0xba04100f             // movups    xmm0, oword [rdx + 4*rdi]
  4853  	LONG $0xba4c100f; BYTE $0x10 // movups    xmm1, oword [rdx + 4*rdi + 16]
  4854  	WORD $0x5b0f; BYTE $0xc0     // cvtdq2ps    xmm0, xmm0
  4855  	WORD $0x5b0f; BYTE $0xc9     // cvtdq2ps    xmm1, xmm1
  4856  	LONG $0xb904110f             // movups    oword [rcx + 4*rdi], xmm0
  4857  	LONG $0xb94c110f; BYTE $0x10 // movups    oword [rcx + 4*rdi + 16], xmm1
  4858  	LONG $0xba44100f; BYTE $0x20 // movups    xmm0, oword [rdx + 4*rdi + 32]
  4859  	LONG $0xba4c100f; BYTE $0x30 // movups    xmm1, oword [rdx + 4*rdi + 48]
  4860  	WORD $0x5b0f; BYTE $0xc0     // cvtdq2ps    xmm0, xmm0
  4861  	WORD $0x5b0f; BYTE $0xc9     // cvtdq2ps    xmm1, xmm1
  4862  	LONG $0xb944110f; BYTE $0x20 // movups    oword [rcx + 4*rdi + 32], xmm0
  4863  	LONG $0xb94c110f; BYTE $0x30 // movups    oword [rcx + 4*rdi + 48], xmm1
  4864  	LONG $0x10c78348             // add    rdi, 16
  4865  	LONG $0x02c08348             // add    rax, 2
  4866  	JNE  LBB0_700
  4867  	JMP  LBB0_1069
  4868  
  4869  LBB0_734:
  4870  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4871  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4872  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4873  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4874  	LONG $0x02e8c149         // shr    r8, 2
  4875  	LONG $0x01c08349         // add    r8, 1
  4876  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4877  	JE   LBB0_1073
  4878  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4879  	LONG $0xfee08348         // and    rax, -2
  4880  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4881  	WORD $0xff31             // xor    edi, edi
  4882  
  4883  LBB0_736:
  4884  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  4885  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  4886  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4887  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4888  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  4889  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
  4890  	LONG $0x44100f66; WORD $0x20fa // movupd    xmm0, oword [rdx + 8*rdi + 32]
  4891  	LONG $0x4c100f66; WORD $0x30fa // movupd    xmm1, oword [rdx + 8*rdi + 48]
  4892  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  4893  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  4894  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  4895  	LONG $0x44110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm0
  4896  	LONG $0x08c78348               // add    rdi, 8
  4897  	LONG $0x02c08348               // add    rax, 2
  4898  	JNE  LBB0_736
  4899  	JMP  LBB0_1074
  4900  
  4901  LBB0_740:
  4902  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4903  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4904  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4905  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4906  	LONG $0x02e8c149         // shr    r8, 2
  4907  	LONG $0x01c08349         // add    r8, 1
  4908  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4909  	JE   LBB0_939
  4910  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4911  	LONG $0xfee08348         // and    rax, -2
  4912  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4913  	WORD $0xff31             // xor    edi, edi
  4914  
  4915  LBB0_742:
  4916  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  4917  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4918  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4919  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4920  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  4921  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  4922  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4923  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4924  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  4925  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  4926  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  4927  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
  4928  	LONG $0x08c78348               // add    rdi, 8
  4929  	LONG $0x02c08348               // add    rax, 2
  4930  	JNE  LBB0_742
  4931  	JMP  LBB0_940
  4932  
  4933  LBB0_743:
  4934  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4935  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4936  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4937  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4938  	LONG $0x03e8c149         // shr    r8, 3
  4939  	LONG $0x01c08349         // add    r8, 1
  4940  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4941  	JE   LBB0_1078
  4942  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4943  	LONG $0xfee08348         // and    rax, -2
  4944  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4945  	WORD $0xff31             // xor    edi, edi
  4946  
  4947  LBB0_745:
  4948  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  4949  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  4950  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  4951  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  4952  	LONG $0x33380f66; WORD $0x7a44; BYTE $0x10 // pmovzxwd    xmm0, qword [rdx + 2*rdi + 16]
  4953  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x18 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 24]
  4954  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  4955  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  4956  	LONG $0x10c78348                           // add    rdi, 16
  4957  	LONG $0x02c08348                           // add    rax, 2
  4958  	JNE  LBB0_745
  4959  	JMP  LBB0_1079
  4960  
  4961  LBB0_746:
  4962  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4963  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4964  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4965  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4966  	LONG $0x03e8c149         // shr    r8, 3
  4967  	LONG $0x01c08349         // add    r8, 1
  4968  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4969  	JE   LBB0_1083
  4970  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4971  	LONG $0xfee08348         // and    rax, -2
  4972  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4973  	WORD $0xff31             // xor    edi, edi
  4974  
  4975  LBB0_748:
  4976  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  4977  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  4978  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  4979  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  4980  	LONG $0x23380f66; WORD $0x7a44; BYTE $0x10 // pmovsxwd    xmm0, qword [rdx + 2*rdi + 16]
  4981  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x18 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 24]
  4982  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  4983  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  4984  	LONG $0x10c78348                           // add    rdi, 16
  4985  	LONG $0x02c08348                           // add    rax, 2
  4986  	JNE  LBB0_748
  4987  	JMP  LBB0_1084
  4988  
  4989  LBB0_749:
  4990  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  4991  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4992  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4993  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  4994  	LONG $0x02e8c149         // shr    r8, 2
  4995  	LONG $0x01c08349         // add    r8, 1
  4996  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4997  	JE   LBB0_1088
  4998  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  4999  	LONG $0xfee08348         // and    rax, -2
  5000  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5001  	WORD $0xff31             // xor    edi, edi
  5002  
  5003  LBB0_751:
  5004  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  5005  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  5006  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  5007  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  5008  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  5009  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  5010  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  5011  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  5012  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  5013  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  5014  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  5015  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
  5016  	LONG $0x08c78348               // add    rdi, 8
  5017  	LONG $0x02c08348               // add    rax, 2
  5018  	JNE  LBB0_751
  5019  	JMP  LBB0_1089
  5020  
  5021  LBB0_752:
  5022  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5023  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5024  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5025  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5026  	LONG $0x03e8c149         // shr    r8, 3
  5027  	LONG $0x01c08349         // add    r8, 1
  5028  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5029  	JE   LBB0_1093
  5030  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5031  	LONG $0xfee08348         // and    rax, -2
  5032  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5033  	WORD $0xff31             // xor    edi, edi
  5034  
  5035  LBB0_754:
  5036  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  5037  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  5038  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  5039  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  5040  	LONG $0xb904110f               // movups    oword [rcx + 4*rdi], xmm0
  5041  	LONG $0xb94c110f; BYTE $0x10   // movups    oword [rcx + 4*rdi + 16], xmm1
  5042  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  5043  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  5044  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  5045  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  5046  	LONG $0x44110f66; WORD $0x20b9 // movupd    oword [rcx + 4*rdi + 32], xmm0
  5047  	LONG $0x4c110f66; WORD $0x30b9 // movupd    oword [rcx + 4*rdi + 48], xmm1
  5048  	LONG $0x10c78348               // add    rdi, 16
  5049  	LONG $0x02c08348               // add    rax, 2
  5050  	JNE  LBB0_754
  5051  	JMP  LBB0_1094
  5052  
  5053  LBB0_761:
  5054  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5055  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5056  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5057  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5058  	LONG $0x03efc148         // shr    rdi, 3
  5059  	LONG $0x01c78348         // add    rdi, 1
  5060  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5061  	LONG $0x03e08341         // and    r8d, 3
  5062  	LONG $0x18f88348         // cmp    rax, 24
  5063  	JAE  LBB0_1098
  5064  	WORD $0xc031             // xor    eax, eax
  5065  	JMP  LBB0_1100
  5066  
  5067  LBB0_763:
  5068  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5069  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5070  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5071  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5072  	LONG $0x03e8c149         // shr    r8, 3
  5073  	LONG $0x01c08349         // add    r8, 1
  5074  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5075  	JE   LBB0_1478
  5076  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5077  	LONG $0xfee08348         // and    rax, -2
  5078  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5079  	WORD $0xff31             // xor    edi, edi
  5080  
  5081  LBB0_765:
  5082  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  5083  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  5084  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  5085  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  5086  	LONG $0x21380f66; WORD $0x3a44; BYTE $0x08 // pmovsxbd    xmm0, dword [rdx + rdi + 8]
  5087  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x0c // pmovsxbd    xmm1, dword [rdx + rdi + 12]
  5088  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  5089  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  5090  	LONG $0x10c78348                           // add    rdi, 16
  5091  	LONG $0x02c08348                           // add    rax, 2
  5092  	JNE  LBB0_765
  5093  	JMP  LBB0_1479
  5094  
  5095  LBB0_766:
  5096  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5097  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5098  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5099  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5100  	LONG $0x03e8c149         // shr    r8, 3
  5101  	LONG $0x01c08349         // add    r8, 1
  5102  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5103  	JE   LBB0_1486
  5104  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5105  	LONG $0xfee08348         // and    rax, -2
  5106  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5107  	WORD $0xff31             // xor    edi, edi
  5108  
  5109  LBB0_768:
  5110  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  5111  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  5112  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  5113  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  5114  	LONG $0x31380f66; WORD $0x3a44; BYTE $0x08 // pmovzxbd    xmm0, dword [rdx + rdi + 8]
  5115  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x0c // pmovzxbd    xmm1, dword [rdx + rdi + 12]
  5116  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  5117  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  5118  	LONG $0x10c78348                           // add    rdi, 16
  5119  	LONG $0x02c08348                           // add    rax, 2
  5120  	JNE  LBB0_768
  5121  	JMP  LBB0_1487
  5122  
  5123  LBB0_769:
  5124  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5125  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5126  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5127  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5128  	LONG $0x03efc148         // shr    rdi, 3
  5129  	LONG $0x01c78348         // add    rdi, 1
  5130  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5131  	LONG $0x03e08341         // and    r8d, 3
  5132  	LONG $0x18f88348         // cmp    rax, 24
  5133  	JAE  LBB0_1108
  5134  	WORD $0xc031             // xor    eax, eax
  5135  	JMP  LBB0_1110
  5136  
  5137  LBB0_771:
  5138  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5139  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5140  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5141  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5142  	LONG $0x02efc148         // shr    rdi, 2
  5143  	LONG $0x01c78348         // add    rdi, 1
  5144  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5145  	LONG $0x03e08341         // and    r8d, 3
  5146  	LONG $0x0cf88348         // cmp    rax, 12
  5147  	JAE  LBB0_1118
  5148  	WORD $0xc031             // xor    eax, eax
  5149  	JMP  LBB0_1120
  5150  
  5151  LBB0_773:
  5152  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5153  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5154  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5155  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5156  	LONG $0x03e8c149         // shr    r8, 3
  5157  	LONG $0x01c08349         // add    r8, 1
  5158  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5159  	JE   LBB0_1494
  5160  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5161  	LONG $0xfee08348         // and    rax, -2
  5162  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5163  	WORD $0xff31             // xor    edi, edi
  5164  	QUAD $0x000000c0856f0f66 // movdqa    xmm0, oword 192[rbp] /* [rip + .LCPI0_13] */
  5165  
  5166  LBB0_775:
  5167  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  5168  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  5169  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5170  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5171  	LONG $0x0c7e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm1
  5172  	LONG $0x547e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm2
  5173  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  5174  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  5175  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5176  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5177  	LONG $0x4c7e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm1
  5178  	LONG $0x547e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm2
  5179  	LONG $0x10c78348               // add    rdi, 16
  5180  	LONG $0x02c08348               // add    rax, 2
  5181  	JNE  LBB0_775
  5182  	JMP  LBB0_1495
  5183  
  5184  LBB0_776:
  5185  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5186  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5187  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5188  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  5189  	LONG $0x02e8c149             // shr    r8, 2
  5190  	LONG $0x01c08349             // add    r8, 1
  5191  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5192  	JE   LBB0_1502
  5193  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  5194  	LONG $0xfee08348             // and    rax, -2
  5195  	WORD $0xf748; BYTE $0xd8     // neg    rax
  5196  	WORD $0xff31                 // xor    edi, edi
  5197  	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
  5198  
  5199  LBB0_778:
  5200  	LONG $0x0c100f66; BYTE $0xfa               // movupd    xmm1, oword [rdx + 8*rdi]
  5201  	LONG $0x54100f66; WORD $0x10fa             // movupd    xmm2, oword [rdx + 8*rdi + 16]
  5202  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  5203  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
  5204  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5205  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  5206  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5207  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  5208  	LONG $0x4c100f66; WORD $0x20fa             // movupd    xmm1, oword [rdx + 8*rdi + 32]
  5209  	LONG $0x54100f66; WORD $0x30fa             // movupd    xmm2, oword [rdx + 8*rdi + 48]
  5210  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  5211  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
  5212  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5213  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  5214  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5215  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  5216  	LONG $0x08c78348                           // add    rdi, 8
  5217  	LONG $0x02c08348                           // add    rax, 2
  5218  	JNE  LBB0_778
  5219  	JMP  LBB0_1503
  5220  
  5221  LBB0_779:
  5222  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5223  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5224  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5225  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5226  	LONG $0x05efc148         // shr    rdi, 5
  5227  	LONG $0x01c78348         // add    rdi, 1
  5228  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5229  	LONG $0x03e08341         // and    r8d, 3
  5230  	LONG $0x60f88348         // cmp    rax, 96
  5231  	JAE  LBB0_1128
  5232  	WORD $0xc031             // xor    eax, eax
  5233  	JMP  LBB0_1130
  5234  
  5235  LBB0_781:
  5236  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5237  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5238  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5239  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  5240  	LONG $0x02e8c149             // shr    r8, 2
  5241  	LONG $0x01c08349             // add    r8, 1
  5242  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5243  	JE   LBB0_1318
  5244  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  5245  	LONG $0xfee08348             // and    rax, -2
  5246  	WORD $0xf748; BYTE $0xd8     // neg    rax
  5247  	WORD $0xff31                 // xor    edi, edi
  5248  	LONG $0x456f0f66; BYTE $0x40 // movdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_5] */
  5249  
  5250  LBB0_783:
  5251  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
  5252  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
  5253  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5254  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  5255  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5256  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  5257  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
  5258  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
  5259  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5260  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  5261  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5262  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  5263  	LONG $0x08c78348                           // add    rdi, 8
  5264  	LONG $0x02c08348                           // add    rax, 2
  5265  	JNE  LBB0_783
  5266  	JMP  LBB0_1319
  5267  
  5268  LBB0_784:
  5269  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5270  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5271  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5272  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5273  	LONG $0x04e8c149         // shr    r8, 4
  5274  	LONG $0x01c08349         // add    r8, 1
  5275  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5276  	JE   LBB0_1326
  5277  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5278  	LONG $0xfee08348         // and    rax, -2
  5279  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5280  	WORD $0xff31             // xor    edi, edi
  5281  	QUAD $0x00000100856f0f66 // movdqa    xmm0, oword 256[rbp] /* [rip + .LCPI0_17] */
  5282  
  5283  LBB0_786:
  5284  	LONG $0x0c6f0ff3; BYTE $0x7a   // movdqu    xmm1, oword [rdx + 2*rdi]
  5285  	LONG $0x546f0ff3; WORD $0x107a // movdqu    xmm2, oword [rdx + 2*rdi + 16]
  5286  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5287  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5288  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5289  	LONG $0x0c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm1
  5290  	LONG $0x4c6f0ff3; WORD $0x207a // movdqu    xmm1, oword [rdx + 2*rdi + 32]
  5291  	LONG $0x546f0ff3; WORD $0x307a // movdqu    xmm2, oword [rdx + 2*rdi + 48]
  5292  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5293  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5294  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5295  	LONG $0x4c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm1
  5296  	LONG $0x20c78348               // add    rdi, 32
  5297  	LONG $0x02c08348               // add    rax, 2
  5298  	JNE  LBB0_786
  5299  	JMP  LBB0_1327
  5300  
  5301  LBB0_787:
  5302  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5303  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5304  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5305  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5306  	LONG $0x04e8c149         // shr    r8, 4
  5307  	LONG $0x01c08349         // add    r8, 1
  5308  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5309  	JE   LBB0_1510
  5310  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5311  	LONG $0xfee08348         // and    rax, -2
  5312  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5313  	WORD $0xff31             // xor    edi, edi
  5314  	QUAD $0x00000100856f0f66 // movdqa    xmm0, oword 256[rbp] /* [rip + .LCPI0_17] */
  5315  
  5316  LBB0_789:
  5317  	LONG $0x0c6f0ff3; BYTE $0x7a   // movdqu    xmm1, oword [rdx + 2*rdi]
  5318  	LONG $0x546f0ff3; WORD $0x107a // movdqu    xmm2, oword [rdx + 2*rdi + 16]
  5319  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5320  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5321  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5322  	LONG $0x0c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm1
  5323  	LONG $0x4c6f0ff3; WORD $0x207a // movdqu    xmm1, oword [rdx + 2*rdi + 32]
  5324  	LONG $0x546f0ff3; WORD $0x307a // movdqu    xmm2, oword [rdx + 2*rdi + 48]
  5325  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5326  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5327  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5328  	LONG $0x4c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm1
  5329  	LONG $0x20c78348               // add    rdi, 32
  5330  	LONG $0x02c08348               // add    rax, 2
  5331  	JNE  LBB0_789
  5332  	JMP  LBB0_1511
  5333  
  5334  LBB0_790:
  5335  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5336  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5337  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5338  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  5339  	LONG $0x02e8c149             // shr    r8, 2
  5340  	LONG $0x01c08349             // add    r8, 1
  5341  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5342  	JE   LBB0_1334
  5343  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  5344  	LONG $0xfee08348             // and    rax, -2
  5345  	WORD $0xf748; BYTE $0xd8     // neg    rax
  5346  	WORD $0xff31                 // xor    edi, edi
  5347  	LONG $0x456f0f66; BYTE $0x40 // movdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_5] */
  5348  
  5349  LBB0_792:
  5350  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
  5351  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
  5352  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5353  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  5354  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5355  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  5356  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
  5357  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
  5358  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5359  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  5360  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5361  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  5362  	LONG $0x08c78348                           // add    rdi, 8
  5363  	LONG $0x02c08348                           // add    rax, 2
  5364  	JNE  LBB0_792
  5365  	JMP  LBB0_1335
  5366  
  5367  LBB0_793:
  5368  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5369  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5370  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5371  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5372  	LONG $0x03e8c149         // shr    r8, 3
  5373  	LONG $0x01c08349         // add    r8, 1
  5374  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5375  	JE   LBB0_1518
  5376  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5377  	LONG $0xfee08348         // and    rax, -2
  5378  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5379  	WORD $0xff31             // xor    edi, edi
  5380  
  5381  LBB0_795:
  5382  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  5383  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  5384  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  5385  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
  5386  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
  5387  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  5388  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
  5389  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
  5390  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  5391  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  5392  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  5393  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  5394  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  5395  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
  5396  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
  5397  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  5398  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
  5399  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
  5400  	LONG $0x447e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm0
  5401  	LONG $0x4c7e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm1
  5402  	LONG $0x10c78348               // add    rdi, 16
  5403  	LONG $0x02c08348               // add    rax, 2
  5404  	JNE  LBB0_795
  5405  	JMP  LBB0_1519
  5406  
  5407  LBB0_796:
  5408  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5409  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5410  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5411  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5412  	LONG $0x05efc148         // shr    rdi, 5
  5413  	LONG $0x01c78348         // add    rdi, 1
  5414  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5415  	LONG $0x03e08341         // and    r8d, 3
  5416  	LONG $0x60f88348         // cmp    rax, 96
  5417  	JAE  LBB0_1138
  5418  	WORD $0xc031             // xor    eax, eax
  5419  	JMP  LBB0_1140
  5420  
  5421  LBB0_798:
  5422  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5423  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5424  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5425  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5426  	LONG $0x03e8c149         // shr    r8, 3
  5427  	LONG $0x01c08349         // add    r8, 1
  5428  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5429  	JE   LBB0_1342
  5430  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5431  	LONG $0xfee08348         // and    rax, -2
  5432  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5433  	WORD $0xff31             // xor    edi, edi
  5434  	QUAD $0x000000c0856f0f66 // movdqa    xmm0, oword 192[rbp] /* [rip + .LCPI0_13] */
  5435  
  5436  LBB0_800:
  5437  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  5438  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  5439  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5440  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5441  	LONG $0x0c7e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm1
  5442  	LONG $0x547e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm2
  5443  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  5444  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  5445  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5446  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5447  	LONG $0x4c7e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm1
  5448  	LONG $0x547e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm2
  5449  	LONG $0x10c78348               // add    rdi, 16
  5450  	LONG $0x02c08348               // add    rax, 2
  5451  	JNE  LBB0_800
  5452  	JMP  LBB0_1343
  5453  
  5454  LBB0_808:
  5455  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5456  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5457  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5458  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5459  	LONG $0x02efc148         // shr    rdi, 2
  5460  	LONG $0x01c78348         // add    rdi, 1
  5461  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5462  	LONG $0x03e08341         // and    r8d, 3
  5463  	LONG $0x0cf88348         // cmp    rax, 12
  5464  	JAE  LBB0_1148
  5465  	WORD $0xc031             // xor    eax, eax
  5466  	JMP  LBB0_1150
  5467  
  5468  LBB0_810:
  5469  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5470  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5471  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5472  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5473  	LONG $0x02efc148         // shr    rdi, 2
  5474  	LONG $0x01c78348         // add    rdi, 1
  5475  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5476  	LONG $0x03e08341         // and    r8d, 3
  5477  	LONG $0x0cf88348         // cmp    rax, 12
  5478  	JAE  LBB0_1158
  5479  	WORD $0xc031             // xor    eax, eax
  5480  	JMP  LBB0_1160
  5481  
  5482  LBB0_826:
  5483  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5484  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5485  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5486  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5487  	LONG $0x02efc148         // shr    rdi, 2
  5488  	LONG $0x01c78348         // add    rdi, 1
  5489  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5490  	LONG $0x03e08341         // and    r8d, 3
  5491  	LONG $0x0cf88348         // cmp    rax, 12
  5492  	JAE  LBB0_1168
  5493  	WORD $0xc031             // xor    eax, eax
  5494  	JMP  LBB0_1170
  5495  
  5496  LBB0_828:
  5497  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5498  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5499  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5500  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5501  	LONG $0x02efc148         // shr    rdi, 2
  5502  	LONG $0x01c78348         // add    rdi, 1
  5503  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5504  	LONG $0x03e08341         // and    r8d, 3
  5505  	LONG $0x0cf88348         // cmp    rax, 12
  5506  	JAE  LBB0_1178
  5507  	WORD $0xc031             // xor    eax, eax
  5508  	JMP  LBB0_1180
  5509  
  5510  LBB0_837:
  5511  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5512  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5513  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5514  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5515  	LONG $0x04e8c149         // shr    r8, 4
  5516  	LONG $0x01c08349         // add    r8, 1
  5517  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5518  	JE   LBB0_1350
  5519  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5520  	LONG $0xfee08348         // and    rax, -2
  5521  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5522  	WORD $0xff31             // xor    edi, edi
  5523  
  5524  LBB0_839:
  5525  	LONG $0x20380f66; WORD $0x3a04             // pmovsxbw    xmm0, qword [rdx + rdi]
  5526  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbw    xmm1, qword [rdx + rdi + 8]
  5527  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  5528  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  5529  	LONG $0x20380f66; WORD $0x3a44; BYTE $0x10 // pmovsxbw    xmm0, qword [rdx + rdi + 16]
  5530  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x18 // pmovsxbw    xmm1, qword [rdx + rdi + 24]
  5531  	LONG $0x447f0ff3; WORD $0x2079             // movdqu    oword [rcx + 2*rdi + 32], xmm0
  5532  	LONG $0x4c7f0ff3; WORD $0x3079             // movdqu    oword [rcx + 2*rdi + 48], xmm1
  5533  	LONG $0x20c78348                           // add    rdi, 32
  5534  	LONG $0x02c08348                           // add    rax, 2
  5535  	JNE  LBB0_839
  5536  	JMP  LBB0_1351
  5537  
  5538  LBB0_840:
  5539  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5540  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5541  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5542  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5543  	LONG $0x04e8c149         // shr    r8, 4
  5544  	LONG $0x01c08349         // add    r8, 1
  5545  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5546  	JE   LBB0_1358
  5547  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5548  	LONG $0xfee08348         // and    rax, -2
  5549  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5550  	WORD $0xff31             // xor    edi, edi
  5551  
  5552  LBB0_842:
  5553  	LONG $0x20380f66; WORD $0x3a04             // pmovsxbw    xmm0, qword [rdx + rdi]
  5554  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbw    xmm1, qword [rdx + rdi + 8]
  5555  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  5556  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  5557  	LONG $0x20380f66; WORD $0x3a44; BYTE $0x10 // pmovsxbw    xmm0, qword [rdx + rdi + 16]
  5558  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x18 // pmovsxbw    xmm1, qword [rdx + rdi + 24]
  5559  	LONG $0x447f0ff3; WORD $0x2079             // movdqu    oword [rcx + 2*rdi + 32], xmm0
  5560  	LONG $0x4c7f0ff3; WORD $0x3079             // movdqu    oword [rcx + 2*rdi + 48], xmm1
  5561  	LONG $0x20c78348                           // add    rdi, 32
  5562  	LONG $0x02c08348                           // add    rax, 2
  5563  	JNE  LBB0_842
  5564  	JMP  LBB0_1359
  5565  
  5566  LBB0_843:
  5567  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5568  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5569  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5570  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5571  	LONG $0x04efc148         // shr    rdi, 4
  5572  	LONG $0x01c78348         // add    rdi, 1
  5573  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5574  	LONG $0x03e08341         // and    r8d, 3
  5575  	LONG $0x30f88348         // cmp    rax, 48
  5576  	JAE  LBB0_1188
  5577  	WORD $0xc031             // xor    eax, eax
  5578  	JMP  LBB0_1190
  5579  
  5580  LBB0_845:
  5581  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5582  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5583  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5584  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5585  	LONG $0x04efc148         // shr    rdi, 4
  5586  	LONG $0x01c78348         // add    rdi, 1
  5587  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5588  	LONG $0x03e08341         // and    r8d, 3
  5589  	LONG $0x30f88348         // cmp    rax, 48
  5590  	JAE  LBB0_1198
  5591  	WORD $0xc031             // xor    eax, eax
  5592  	JMP  LBB0_1200
  5593  
  5594  LBB0_847:
  5595  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5596  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5597  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5598  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5599  	LONG $0x04efc148         // shr    rdi, 4
  5600  	LONG $0x01c78348         // add    rdi, 1
  5601  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5602  	LONG $0x03e08341         // and    r8d, 3
  5603  	LONG $0x30f88348         // cmp    rax, 48
  5604  	JAE  LBB0_1208
  5605  	WORD $0xc031             // xor    eax, eax
  5606  	JMP  LBB0_1210
  5607  
  5608  LBB0_849:
  5609  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5610  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5611  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5612  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5613  	LONG $0x04efc148         // shr    rdi, 4
  5614  	LONG $0x01c78348         // add    rdi, 1
  5615  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5616  	LONG $0x03e08341         // and    r8d, 3
  5617  	LONG $0x30f88348         // cmp    rax, 48
  5618  	JAE  LBB0_1218
  5619  	WORD $0xc031             // xor    eax, eax
  5620  	JMP  LBB0_1220
  5621  
  5622  LBB0_851:
  5623  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5624  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5625  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5626  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5627  	LONG $0x04e8c149         // shr    r8, 4
  5628  	LONG $0x01c08349         // add    r8, 1
  5629  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5630  	JE   LBB0_1366
  5631  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5632  	LONG $0xfee08348         // and    rax, -2
  5633  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5634  	WORD $0xff31             // xor    edi, edi
  5635  
  5636  LBB0_853:
  5637  	LONG $0x30380f66; WORD $0x3a04             // pmovzxbw    xmm0, qword [rdx + rdi]
  5638  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x08 // pmovzxbw    xmm1, qword [rdx + rdi + 8]
  5639  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  5640  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  5641  	LONG $0x30380f66; WORD $0x3a44; BYTE $0x10 // pmovzxbw    xmm0, qword [rdx + rdi + 16]
  5642  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x18 // pmovzxbw    xmm1, qword [rdx + rdi + 24]
  5643  	LONG $0x447f0ff3; WORD $0x2079             // movdqu    oword [rcx + 2*rdi + 32], xmm0
  5644  	LONG $0x4c7f0ff3; WORD $0x3079             // movdqu    oword [rcx + 2*rdi + 48], xmm1
  5645  	LONG $0x20c78348                           // add    rdi, 32
  5646  	LONG $0x02c08348                           // add    rax, 2
  5647  	JNE  LBB0_853
  5648  	JMP  LBB0_1367
  5649  
  5650  LBB0_854:
  5651  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5652  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5653  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5654  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5655  	LONG $0x04e8c149         // shr    r8, 4
  5656  	LONG $0x01c08349         // add    r8, 1
  5657  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5658  	JE   LBB0_1374
  5659  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5660  	LONG $0xfee08348         // and    rax, -2
  5661  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5662  	WORD $0xff31             // xor    edi, edi
  5663  
  5664  LBB0_856:
  5665  	LONG $0x30380f66; WORD $0x3a04             // pmovzxbw    xmm0, qword [rdx + rdi]
  5666  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x08 // pmovzxbw    xmm1, qword [rdx + rdi + 8]
  5667  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  5668  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  5669  	LONG $0x30380f66; WORD $0x3a44; BYTE $0x10 // pmovzxbw    xmm0, qword [rdx + rdi + 16]
  5670  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x18 // pmovzxbw    xmm1, qword [rdx + rdi + 24]
  5671  	LONG $0x447f0ff3; WORD $0x2079             // movdqu    oword [rcx + 2*rdi + 32], xmm0
  5672  	LONG $0x4c7f0ff3; WORD $0x3079             // movdqu    oword [rcx + 2*rdi + 48], xmm1
  5673  	LONG $0x20c78348                           // add    rdi, 32
  5674  	LONG $0x02c08348                           // add    rax, 2
  5675  	JNE  LBB0_856
  5676  	JMP  LBB0_1375
  5677  
  5678  LBB0_864:
  5679  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5680  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5681  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5682  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5683  	LONG $0x02efc148         // shr    rdi, 2
  5684  	LONG $0x01c78348         // add    rdi, 1
  5685  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5686  	LONG $0x03e08341         // and    r8d, 3
  5687  	LONG $0x0cf88348         // cmp    rax, 12
  5688  	JAE  LBB0_1228
  5689  	WORD $0xc031             // xor    eax, eax
  5690  	JMP  LBB0_1230
  5691  
  5692  LBB0_866:
  5693  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5694  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5695  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5696  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5697  	LONG $0x03e8c149         // shr    r8, 3
  5698  	LONG $0x01c08349         // add    r8, 1
  5699  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5700  	JE   LBB0_1382
  5701  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5702  	LONG $0xfee08348         // and    rax, -2
  5703  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5704  	WORD $0xff31             // xor    edi, edi
  5705  
  5706  LBB0_868:
  5707  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  5708  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  5709  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  5710  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  5711  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  5712  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  5713  	LONG $0x21380f66; WORD $0x3a44; BYTE $0x08 // pmovsxbd    xmm0, dword [rdx + rdi + 8]
  5714  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x0c // pmovsxbd    xmm1, dword [rdx + rdi + 12]
  5715  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  5716  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  5717  	LONG $0xb944110f; BYTE $0x20               // movups    oword [rcx + 4*rdi + 32], xmm0
  5718  	LONG $0xb94c110f; BYTE $0x30               // movups    oword [rcx + 4*rdi + 48], xmm1
  5719  	LONG $0x10c78348                           // add    rdi, 16
  5720  	LONG $0x02c08348                           // add    rax, 2
  5721  	JNE  LBB0_868
  5722  	JMP  LBB0_1383
  5723  
  5724  LBB0_869:
  5725  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5726  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5727  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5728  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5729  	LONG $0x02efc148         // shr    rdi, 2
  5730  	LONG $0x01c78348         // add    rdi, 1
  5731  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5732  	LONG $0x03e08341         // and    r8d, 3
  5733  	LONG $0x0cf88348         // cmp    rax, 12
  5734  	JAE  LBB0_1238
  5735  	WORD $0xc031             // xor    eax, eax
  5736  	JMP  LBB0_1240
  5737  
  5738  LBB0_885:
  5739  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5740  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5741  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5742  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5743  	LONG $0x02efc148         // shr    rdi, 2
  5744  	LONG $0x01c78348         // add    rdi, 1
  5745  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5746  	LONG $0x03e08341         // and    r8d, 3
  5747  	LONG $0x0cf88348         // cmp    rax, 12
  5748  	JAE  LBB0_1248
  5749  	WORD $0xc031             // xor    eax, eax
  5750  	JMP  LBB0_1250
  5751  
  5752  LBB0_887:
  5753  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5754  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5755  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5756  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5757  	LONG $0x03efc148         // shr    rdi, 3
  5758  	LONG $0x01c78348         // add    rdi, 1
  5759  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5760  	LONG $0x03e08341         // and    r8d, 3
  5761  	LONG $0x18f88348         // cmp    rax, 24
  5762  	JAE  LBB0_1258
  5763  	WORD $0xc031             // xor    eax, eax
  5764  	JMP  LBB0_1260
  5765  
  5766  LBB0_889:
  5767  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5768  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5769  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5770  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5771  	LONG $0x02efc148         // shr    rdi, 2
  5772  	LONG $0x01c78348         // add    rdi, 1
  5773  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5774  	LONG $0x03e08341         // and    r8d, 3
  5775  	LONG $0x0cf88348         // cmp    rax, 12
  5776  	JAE  LBB0_1268
  5777  	WORD $0xc031             // xor    eax, eax
  5778  	JMP  LBB0_1270
  5779  
  5780  LBB0_891:
  5781  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5782  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5783  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5784  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5785  	LONG $0x03e8c149         // shr    r8, 3
  5786  	LONG $0x01c08349         // add    r8, 1
  5787  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5788  	JE   LBB0_1390
  5789  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5790  	LONG $0xfee08348         // and    rax, -2
  5791  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5792  	WORD $0xff31             // xor    edi, edi
  5793  
  5794  LBB0_893:
  5795  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  5796  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  5797  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  5798  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  5799  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  5800  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  5801  	LONG $0x31380f66; WORD $0x3a44; BYTE $0x08 // pmovzxbd    xmm0, dword [rdx + rdi + 8]
  5802  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x0c // pmovzxbd    xmm1, dword [rdx + rdi + 12]
  5803  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  5804  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  5805  	LONG $0xb944110f; BYTE $0x20               // movups    oword [rcx + 4*rdi + 32], xmm0
  5806  	LONG $0xb94c110f; BYTE $0x30               // movups    oword [rcx + 4*rdi + 48], xmm1
  5807  	LONG $0x10c78348                           // add    rdi, 16
  5808  	LONG $0x02c08348                           // add    rax, 2
  5809  	JNE  LBB0_893
  5810  	JMP  LBB0_1391
  5811  
  5812  LBB0_901:
  5813  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5814  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5815  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5816  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5817  	LONG $0x03e8c149         // shr    r8, 3
  5818  	LONG $0x01c08349         // add    r8, 1
  5819  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5820  	JE   LBB0_1398
  5821  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5822  	LONG $0xfee08348         // and    rax, -2
  5823  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5824  	WORD $0xff31             // xor    edi, edi
  5825  	QUAD $0x000000c0856f0f66 // movdqa    xmm0, oword 192[rbp] /* [rip + .LCPI0_13] */
  5826  
  5827  LBB0_903:
  5828  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  5829  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  5830  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5831  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5832  	LONG $0x0c7e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm1
  5833  	LONG $0x547e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm2
  5834  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  5835  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  5836  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5837  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5838  	LONG $0x4c7e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm1
  5839  	LONG $0x547e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm2
  5840  	LONG $0x10c78348               // add    rdi, 16
  5841  	LONG $0x02c08348               // add    rax, 2
  5842  	JNE  LBB0_903
  5843  	JMP  LBB0_1399
  5844  
  5845  LBB0_904:
  5846  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5847  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5848  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5849  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  5850  	LONG $0x02e8c149             // shr    r8, 2
  5851  	LONG $0x01c08349             // add    r8, 1
  5852  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5853  	JE   LBB0_1406
  5854  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  5855  	LONG $0xfee08348             // and    rax, -2
  5856  	WORD $0xf748; BYTE $0xd8     // neg    rax
  5857  	WORD $0xff31                 // xor    edi, edi
  5858  	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
  5859  
  5860  LBB0_906:
  5861  	LONG $0x0c100f66; BYTE $0xfa               // movupd    xmm1, oword [rdx + 8*rdi]
  5862  	LONG $0x54100f66; WORD $0x10fa             // movupd    xmm2, oword [rdx + 8*rdi + 16]
  5863  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  5864  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
  5865  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5866  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  5867  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5868  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  5869  	LONG $0x4c100f66; WORD $0x20fa             // movupd    xmm1, oword [rdx + 8*rdi + 32]
  5870  	LONG $0x54100f66; WORD $0x30fa             // movupd    xmm2, oword [rdx + 8*rdi + 48]
  5871  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  5872  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
  5873  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5874  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  5875  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5876  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  5877  	LONG $0x08c78348                           // add    rdi, 8
  5878  	LONG $0x02c08348                           // add    rax, 2
  5879  	JNE  LBB0_906
  5880  	JMP  LBB0_1407
  5881  
  5882  LBB0_907:
  5883  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5884  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5885  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5886  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  5887  	LONG $0x05efc148         // shr    rdi, 5
  5888  	LONG $0x01c78348         // add    rdi, 1
  5889  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  5890  	LONG $0x03e08341         // and    r8d, 3
  5891  	LONG $0x60f88348         // cmp    rax, 96
  5892  	JAE  LBB0_1278
  5893  	WORD $0xc031             // xor    eax, eax
  5894  	JMP  LBB0_1280
  5895  
  5896  LBB0_909:
  5897  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5898  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5899  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5900  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  5901  	LONG $0x02e8c149             // shr    r8, 2
  5902  	LONG $0x01c08349             // add    r8, 1
  5903  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5904  	JE   LBB0_1414
  5905  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  5906  	LONG $0xfee08348             // and    rax, -2
  5907  	WORD $0xf748; BYTE $0xd8     // neg    rax
  5908  	WORD $0xff31                 // xor    edi, edi
  5909  	LONG $0x456f0f66; BYTE $0x40 // movdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_5] */
  5910  
  5911  LBB0_911:
  5912  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
  5913  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
  5914  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5915  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  5916  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5917  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  5918  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
  5919  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
  5920  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  5921  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  5922  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  5923  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  5924  	LONG $0x08c78348                           // add    rdi, 8
  5925  	LONG $0x02c08348                           // add    rax, 2
  5926  	JNE  LBB0_911
  5927  	JMP  LBB0_1415
  5928  
  5929  LBB0_912:
  5930  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5931  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5932  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5933  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5934  	LONG $0x04e8c149         // shr    r8, 4
  5935  	LONG $0x01c08349         // add    r8, 1
  5936  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5937  	JE   LBB0_1422
  5938  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5939  	LONG $0xfee08348         // and    rax, -2
  5940  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5941  	WORD $0xff31             // xor    edi, edi
  5942  	QUAD $0x00000100856f0f66 // movdqa    xmm0, oword 256[rbp] /* [rip + .LCPI0_17] */
  5943  
  5944  LBB0_914:
  5945  	LONG $0x0c6f0ff3; BYTE $0x7a   // movdqu    xmm1, oword [rdx + 2*rdi]
  5946  	LONG $0x546f0ff3; WORD $0x107a // movdqu    xmm2, oword [rdx + 2*rdi + 16]
  5947  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5948  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5949  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5950  	LONG $0x0c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm1
  5951  	LONG $0x4c6f0ff3; WORD $0x207a // movdqu    xmm1, oword [rdx + 2*rdi + 32]
  5952  	LONG $0x546f0ff3; WORD $0x307a // movdqu    xmm2, oword [rdx + 2*rdi + 48]
  5953  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5954  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5955  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5956  	LONG $0x4c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm1
  5957  	LONG $0x20c78348               // add    rdi, 32
  5958  	LONG $0x02c08348               // add    rax, 2
  5959  	JNE  LBB0_914
  5960  	JMP  LBB0_1423
  5961  
  5962  LBB0_915:
  5963  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  5964  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  5965  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  5966  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  5967  	LONG $0x04e8c149         // shr    r8, 4
  5968  	LONG $0x01c08349         // add    r8, 1
  5969  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5970  	JE   LBB0_1430
  5971  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  5972  	LONG $0xfee08348         // and    rax, -2
  5973  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5974  	WORD $0xff31             // xor    edi, edi
  5975  	QUAD $0x00000100856f0f66 // movdqa    xmm0, oword 256[rbp] /* [rip + .LCPI0_17] */
  5976  
  5977  LBB0_917:
  5978  	LONG $0x0c6f0ff3; BYTE $0x7a   // movdqu    xmm1, oword [rdx + 2*rdi]
  5979  	LONG $0x546f0ff3; WORD $0x107a // movdqu    xmm2, oword [rdx + 2*rdi + 16]
  5980  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5981  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5982  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5983  	LONG $0x0c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm1
  5984  	LONG $0x4c6f0ff3; WORD $0x207a // movdqu    xmm1, oword [rdx + 2*rdi + 32]
  5985  	LONG $0x546f0ff3; WORD $0x307a // movdqu    xmm2, oword [rdx + 2*rdi + 48]
  5986  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  5987  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  5988  	LONG $0xca6c0f66               // punpcklqdq    xmm1, xmm2
  5989  	LONG $0x4c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm1
  5990  	LONG $0x20c78348               // add    rdi, 32
  5991  	LONG $0x02c08348               // add    rax, 2
  5992  	JNE  LBB0_917
  5993  	JMP  LBB0_1431
  5994  
  5995  LBB0_918:
  5996  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
  5997  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
  5998  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
  5999  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
  6000  	LONG $0x02e8c149             // shr    r8, 2
  6001  	LONG $0x01c08349             // add    r8, 1
  6002  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  6003  	JE   LBB0_1438
  6004  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
  6005  	LONG $0xfee08348             // and    rax, -2
  6006  	WORD $0xf748; BYTE $0xd8     // neg    rax
  6007  	WORD $0xff31                 // xor    edi, edi
  6008  	LONG $0x456f0f66; BYTE $0x40 // movdqa    xmm0, oword 64[rbp] /* [rip + .LCPI0_5] */
  6009  
  6010  LBB0_920:
  6011  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
  6012  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
  6013  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  6014  	LONG $0x153a0f66; WORD $0x390c; BYTE $0x00 // pextrw    word [rcx + rdi], xmm1, 0
  6015  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  6016  	QUAD $0x00023954153a0f66                   // pextrw    word [rcx + rdi + 2], xmm2, 0
  6017  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
  6018  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
  6019  	LONG $0x00380f66; BYTE $0xc8               // pshufb    xmm1, xmm0
  6020  	QUAD $0x0004394c153a0f66                   // pextrw    word [rcx + rdi + 4], xmm1, 0
  6021  	LONG $0x00380f66; BYTE $0xd0               // pshufb    xmm2, xmm0
  6022  	QUAD $0x00063954153a0f66                   // pextrw    word [rcx + rdi + 6], xmm2, 0
  6023  	LONG $0x08c78348                           // add    rdi, 8
  6024  	LONG $0x02c08348                           // add    rax, 2
  6025  	JNE  LBB0_920
  6026  	JMP  LBB0_1439
  6027  
  6028  LBB0_921:
  6029  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6030  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6031  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6032  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6033  	LONG $0x03e8c149         // shr    r8, 3
  6034  	LONG $0x01c08349         // add    r8, 1
  6035  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6036  	JE   LBB0_1446
  6037  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6038  	LONG $0xfee08348         // and    rax, -2
  6039  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6040  	WORD $0xff31             // xor    edi, edi
  6041  
  6042  LBB0_923:
  6043  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6044  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6045  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  6046  	LONG $0x2b380f66; BYTE $0xc0   // packusdw    xmm0, xmm0
  6047  	LONG $0xc0670f66               // packuswb    xmm0, xmm0
  6048  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  6049  	LONG $0x2b380f66; BYTE $0xc9   // packusdw    xmm1, xmm1
  6050  	LONG $0xc9670f66               // packuswb    xmm1, xmm1
  6051  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  6052  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  6053  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  6054  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  6055  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  6056  	LONG $0x2b380f66; BYTE $0xc0   // packusdw    xmm0, xmm0
  6057  	LONG $0xc0670f66               // packuswb    xmm0, xmm0
  6058  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  6059  	LONG $0x2b380f66; BYTE $0xc9   // packusdw    xmm1, xmm1
  6060  	LONG $0xc9670f66               // packuswb    xmm1, xmm1
  6061  	LONG $0x447e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm0
  6062  	LONG $0x4c7e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm1
  6063  	LONG $0x10c78348               // add    rdi, 16
  6064  	LONG $0x02c08348               // add    rax, 2
  6065  	JNE  LBB0_923
  6066  	JMP  LBB0_1447
  6067  
  6068  LBB0_924:
  6069  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6070  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  6071  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  6072  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6073  	LONG $0x05efc148         // shr    rdi, 5
  6074  	LONG $0x01c78348         // add    rdi, 1
  6075  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6076  	LONG $0x03e08341         // and    r8d, 3
  6077  	LONG $0x60f88348         // cmp    rax, 96
  6078  	JAE  LBB0_1288
  6079  	WORD $0xc031             // xor    eax, eax
  6080  	JMP  LBB0_1290
  6081  
  6082  LBB0_926:
  6083  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6084  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6085  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6086  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6087  	LONG $0x03e8c149         // shr    r8, 3
  6088  	LONG $0x01c08349         // add    r8, 1
  6089  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6090  	JE   LBB0_1454
  6091  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6092  	LONG $0xfee08348         // and    rax, -2
  6093  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6094  	WORD $0xff31             // xor    edi, edi
  6095  	QUAD $0x000000c0856f0f66 // movdqa    xmm0, oword 192[rbp] /* [rip + .LCPI0_13] */
  6096  
  6097  LBB0_928:
  6098  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
  6099  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
  6100  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  6101  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  6102  	LONG $0x0c7e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm1
  6103  	LONG $0x547e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm2
  6104  	LONG $0x4c6f0ff3; WORD $0x20ba // movdqu    xmm1, oword [rdx + 4*rdi + 32]
  6105  	LONG $0x546f0ff3; WORD $0x30ba // movdqu    xmm2, oword [rdx + 4*rdi + 48]
  6106  	LONG $0x00380f66; BYTE $0xc8   // pshufb    xmm1, xmm0
  6107  	LONG $0x00380f66; BYTE $0xd0   // pshufb    xmm2, xmm0
  6108  	LONG $0x4c7e0f66; WORD $0x0839 // movd    dword [rcx + rdi + 8], xmm1
  6109  	LONG $0x547e0f66; WORD $0x0c39 // movd    dword [rcx + rdi + 12], xmm2
  6110  	LONG $0x10c78348               // add    rdi, 16
  6111  	LONG $0x02c08348               // add    rax, 2
  6112  	JNE  LBB0_928
  6113  	JMP  LBB0_1455
  6114  
  6115  LBB0_929:
  6116  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6117  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6118  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6119  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6120  	LONG $0x03efc148         // shr    rdi, 3
  6121  	LONG $0x01c78348         // add    rdi, 1
  6122  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6123  	LONG $0x03e08341         // and    r8d, 3
  6124  	LONG $0x18f88348         // cmp    rax, 24
  6125  	JAE  LBB0_1298
  6126  	WORD $0xc031             // xor    eax, eax
  6127  	JMP  LBB0_1300
  6128  
  6129  LBB0_931:
  6130  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6131  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6132  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6133  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6134  	LONG $0x03e8c149         // shr    r8, 3
  6135  	LONG $0x01c08349         // add    r8, 1
  6136  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6137  	JE   LBB0_1462
  6138  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6139  	LONG $0xfee08348         // and    rax, -2
  6140  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6141  	WORD $0xff31             // xor    edi, edi
  6142  
  6143  LBB0_933:
  6144  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  6145  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  6146  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  6147  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  6148  	LONG $0x21380f66; WORD $0x3a44; BYTE $0x08 // pmovsxbd    xmm0, dword [rdx + rdi + 8]
  6149  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x0c // pmovsxbd    xmm1, dword [rdx + rdi + 12]
  6150  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  6151  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  6152  	LONG $0x10c78348                           // add    rdi, 16
  6153  	LONG $0x02c08348                           // add    rax, 2
  6154  	JNE  LBB0_933
  6155  	JMP  LBB0_1463
  6156  
  6157  LBB0_934:
  6158  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6159  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6160  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6161  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
  6162  	LONG $0x03e8c149         // shr    r8, 3
  6163  	LONG $0x01c08349         // add    r8, 1
  6164  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  6165  	JE   LBB0_1470
  6166  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
  6167  	LONG $0xfee08348         // and    rax, -2
  6168  	WORD $0xf748; BYTE $0xd8 // neg    rax
  6169  	WORD $0xff31             // xor    edi, edi
  6170  
  6171  LBB0_936:
  6172  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  6173  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  6174  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  6175  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  6176  	LONG $0x31380f66; WORD $0x3a44; BYTE $0x08 // pmovzxbd    xmm0, dword [rdx + rdi + 8]
  6177  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x0c // pmovzxbd    xmm1, dword [rdx + rdi + 12]
  6178  	LONG $0x447f0ff3; WORD $0x20b9             // movdqu    oword [rcx + 4*rdi + 32], xmm0
  6179  	LONG $0x4c7f0ff3; WORD $0x30b9             // movdqu    oword [rcx + 4*rdi + 48], xmm1
  6180  	LONG $0x10c78348                           // add    rdi, 16
  6181  	LONG $0x02c08348                           // add    rax, 2
  6182  	JNE  LBB0_936
  6183  	JMP  LBB0_1471
  6184  
  6185  LBB0_937:
  6186  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  6187  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  6188  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  6189  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  6190  	LONG $0x03efc148         // shr    rdi, 3
  6191  	LONG $0x01c78348         // add    rdi, 1
  6192  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
  6193  	LONG $0x03e08341         // and    r8d, 3
  6194  	LONG $0x18f88348         // cmp    rax, 24
  6195  	JAE  LBB0_1308
  6196  	WORD $0xc031             // xor    eax, eax
  6197  	JMP  LBB0_1310
  6198  
  6199  LBB0_801:
  6200  	LONG $0xfce78348         // and    rdi, -4
  6201  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6202  	WORD $0xc031             // xor    eax, eax
  6203  
  6204  LBB0_802:
  6205  	LONG $0x35380f66; WORD $0x8204             // pmovzxdq    xmm0, qword [rdx + 4*rax]
  6206  	LONG $0x35380f66; WORD $0x824c; BYTE $0x08 // pmovzxdq    xmm1, qword [rdx + 4*rax + 8]
  6207  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6208  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6209  	LONG $0x35380f66; WORD $0x8244; BYTE $0x10 // pmovzxdq    xmm0, qword [rdx + 4*rax + 16]
  6210  	LONG $0x35380f66; WORD $0x824c; BYTE $0x18 // pmovzxdq    xmm1, qword [rdx + 4*rax + 24]
  6211  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6212  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6213  	LONG $0x35380f66; WORD $0x8244; BYTE $0x20 // pmovzxdq    xmm0, qword [rdx + 4*rax + 32]
  6214  	LONG $0x35380f66; WORD $0x824c; BYTE $0x28 // pmovzxdq    xmm1, qword [rdx + 4*rax + 40]
  6215  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6216  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6217  	LONG $0x35380f66; WORD $0x8244; BYTE $0x30 // pmovzxdq    xmm0, qword [rdx + 4*rax + 48]
  6218  	LONG $0x35380f66; WORD $0x824c; BYTE $0x38 // pmovzxdq    xmm1, qword [rdx + 4*rax + 56]
  6219  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6220  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6221  	LONG $0x10c08348                           // add    rax, 16
  6222  	LONG $0x04c78348                           // add    rdi, 4
  6223  	JNE  LBB0_802
  6224  
  6225  LBB0_803:
  6226  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6227  	JE   LBB0_806
  6228  	QUAD $0x0000000885048d48 // lea    rax, [4*rax + 8]
  6229  	WORD $0xf749; BYTE $0xd8 // neg    r8
  6230  
  6231  LBB0_805:
  6232  	LONG $0x35380f66; WORD $0x0244; BYTE $0xf8 // pmovzxdq    xmm0, qword [rdx + rax - 8]
  6233  	LONG $0x35380f66; WORD $0x020c             // pmovzxdq    xmm1, qword [rdx + rax]
  6234  	LONG $0x447f0ff3; WORD $0xf041             // movdqu    oword [rcx + 2*rax - 16], xmm0
  6235  	LONG $0x0c7f0ff3; BYTE $0x41               // movdqu    oword [rcx + 2*rax], xmm1
  6236  	LONG $0x10c08348                           // add    rax, 16
  6237  	WORD $0xff49; BYTE $0xc0                   // inc    r8
  6238  	JNE  LBB0_805
  6239  
  6240  LBB0_806:
  6241  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6242  	JE   LBB0_1526
  6243  
  6244  LBB0_807:
  6245  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  6246  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6247  	LONG $0x01c68348         // add    rsi, 1
  6248  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6249  	JNE  LBB0_807
  6250  	JMP  LBB0_1526
  6251  
  6252  LBB0_812:
  6253  	LONG $0xfce78348         // and    rdi, -4
  6254  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6255  	WORD $0xc031             // xor    eax, eax
  6256  
  6257  LBB0_813:
  6258  	LONG $0x34380f66; WORD $0x4204             // pmovzxwq    xmm0, dword [rdx + 2*rax]
  6259  	LONG $0x34380f66; WORD $0x424c; BYTE $0x04 // pmovzxwq    xmm1, dword [rdx + 2*rax + 4]
  6260  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6261  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6262  	LONG $0x34380f66; WORD $0x4244; BYTE $0x08 // pmovzxwq    xmm0, dword [rdx + 2*rax + 8]
  6263  	LONG $0x34380f66; WORD $0x424c; BYTE $0x0c // pmovzxwq    xmm1, dword [rdx + 2*rax + 12]
  6264  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6265  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6266  	LONG $0x34380f66; WORD $0x4244; BYTE $0x10 // pmovzxwq    xmm0, dword [rdx + 2*rax + 16]
  6267  	LONG $0x34380f66; WORD $0x424c; BYTE $0x14 // pmovzxwq    xmm1, dword [rdx + 2*rax + 20]
  6268  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6269  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6270  	LONG $0x34380f66; WORD $0x4244; BYTE $0x18 // pmovzxwq    xmm0, dword [rdx + 2*rax + 24]
  6271  	LONG $0x34380f66; WORD $0x424c; BYTE $0x1c // pmovzxwq    xmm1, dword [rdx + 2*rax + 28]
  6272  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6273  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6274  	LONG $0x10c08348                           // add    rax, 16
  6275  	LONG $0x04c78348                           // add    rdi, 4
  6276  	JNE  LBB0_813
  6277  
  6278  LBB0_814:
  6279  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6280  	JE   LBB0_817
  6281  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  6282  	LONG $0x10c78348         // add    rdi, 16
  6283  	LONG $0x42148d4c         // lea    r10, [rdx + 2*rax]
  6284  	LONG $0x04c28349         // add    r10, 4
  6285  	WORD $0xc031             // xor    eax, eax
  6286  
  6287  LBB0_816:
  6288  	QUAD $0xfcc24434380f4166                   // pmovzxwq    xmm0, dword [r10 + 8*rax - 4]
  6289  	LONG $0x380f4166; WORD $0x0c34; BYTE $0xc2 // pmovzxwq    xmm1, dword [r10 + 8*rax]
  6290  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  6291  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  6292  	LONG $0x20c78348                           // add    rdi, 32
  6293  	LONG $0x01c08348                           // add    rax, 1
  6294  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  6295  	JNE  LBB0_816
  6296  
  6297  LBB0_817:
  6298  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6299  	JE   LBB0_1526
  6300  
  6301  LBB0_818:
  6302  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  6303  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6304  	LONG $0x01c68348         // add    rsi, 1
  6305  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6306  	JNE  LBB0_818
  6307  	JMP  LBB0_1526
  6308  
  6309  LBB0_819:
  6310  	LONG $0xfce78348         // and    rdi, -4
  6311  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6312  	WORD $0xc031             // xor    eax, eax
  6313  
  6314  LBB0_820:
  6315  	LONG $0x24380f66; WORD $0x4204             // pmovsxwq    xmm0, dword [rdx + 2*rax]
  6316  	LONG $0x24380f66; WORD $0x424c; BYTE $0x04 // pmovsxwq    xmm1, dword [rdx + 2*rax + 4]
  6317  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6318  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6319  	LONG $0x24380f66; WORD $0x4244; BYTE $0x08 // pmovsxwq    xmm0, dword [rdx + 2*rax + 8]
  6320  	LONG $0x24380f66; WORD $0x424c; BYTE $0x0c // pmovsxwq    xmm1, dword [rdx + 2*rax + 12]
  6321  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6322  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6323  	LONG $0x24380f66; WORD $0x4244; BYTE $0x10 // pmovsxwq    xmm0, dword [rdx + 2*rax + 16]
  6324  	LONG $0x24380f66; WORD $0x424c; BYTE $0x14 // pmovsxwq    xmm1, dword [rdx + 2*rax + 20]
  6325  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6326  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6327  	LONG $0x24380f66; WORD $0x4244; BYTE $0x18 // pmovsxwq    xmm0, dword [rdx + 2*rax + 24]
  6328  	LONG $0x24380f66; WORD $0x424c; BYTE $0x1c // pmovsxwq    xmm1, dword [rdx + 2*rax + 28]
  6329  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6330  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6331  	LONG $0x10c08348                           // add    rax, 16
  6332  	LONG $0x04c78348                           // add    rdi, 4
  6333  	JNE  LBB0_820
  6334  
  6335  LBB0_821:
  6336  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6337  	JE   LBB0_824
  6338  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  6339  	LONG $0x10c78348         // add    rdi, 16
  6340  	LONG $0x42148d4c         // lea    r10, [rdx + 2*rax]
  6341  	LONG $0x04c28349         // add    r10, 4
  6342  	WORD $0xc031             // xor    eax, eax
  6343  
  6344  LBB0_823:
  6345  	QUAD $0xfcc24424380f4166                   // pmovsxwq    xmm0, dword [r10 + 8*rax - 4]
  6346  	LONG $0x380f4166; WORD $0x0c24; BYTE $0xc2 // pmovsxwq    xmm1, dword [r10 + 8*rax]
  6347  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  6348  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  6349  	LONG $0x20c78348                           // add    rdi, 32
  6350  	LONG $0x01c08348                           // add    rax, 1
  6351  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  6352  	JNE  LBB0_823
  6353  
  6354  LBB0_824:
  6355  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6356  	JE   LBB0_1526
  6357  
  6358  LBB0_825:
  6359  	LONG $0x04bf0f48; BYTE $0x72 // movsx    rax, word [rdx + 2*rsi]
  6360  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  6361  	LONG $0x01c68348             // add    rsi, 1
  6362  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6363  	JNE  LBB0_825
  6364  	JMP  LBB0_1526
  6365  
  6366  LBB0_830:
  6367  	LONG $0xfce78348         // and    rdi, -4
  6368  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6369  	WORD $0xc031             // xor    eax, eax
  6370  
  6371  LBB0_831:
  6372  	LONG $0x25380f66; WORD $0x8204             // pmovsxdq    xmm0, qword [rdx + 4*rax]
  6373  	LONG $0x25380f66; WORD $0x824c; BYTE $0x08 // pmovsxdq    xmm1, qword [rdx + 4*rax + 8]
  6374  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6375  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6376  	LONG $0x25380f66; WORD $0x8244; BYTE $0x10 // pmovsxdq    xmm0, qword [rdx + 4*rax + 16]
  6377  	LONG $0x25380f66; WORD $0x824c; BYTE $0x18 // pmovsxdq    xmm1, qword [rdx + 4*rax + 24]
  6378  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6379  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6380  	LONG $0x25380f66; WORD $0x8244; BYTE $0x20 // pmovsxdq    xmm0, qword [rdx + 4*rax + 32]
  6381  	LONG $0x25380f66; WORD $0x824c; BYTE $0x28 // pmovsxdq    xmm1, qword [rdx + 4*rax + 40]
  6382  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6383  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6384  	LONG $0x25380f66; WORD $0x8244; BYTE $0x30 // pmovsxdq    xmm0, qword [rdx + 4*rax + 48]
  6385  	LONG $0x25380f66; WORD $0x824c; BYTE $0x38 // pmovsxdq    xmm1, qword [rdx + 4*rax + 56]
  6386  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6387  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6388  	LONG $0x10c08348                           // add    rax, 16
  6389  	LONG $0x04c78348                           // add    rdi, 4
  6390  	JNE  LBB0_831
  6391  
  6392  LBB0_832:
  6393  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6394  	JE   LBB0_835
  6395  	QUAD $0x0000000885048d48 // lea    rax, [4*rax + 8]
  6396  	WORD $0xf749; BYTE $0xd8 // neg    r8
  6397  
  6398  LBB0_834:
  6399  	LONG $0x25380f66; WORD $0x0244; BYTE $0xf8 // pmovsxdq    xmm0, qword [rdx + rax - 8]
  6400  	LONG $0x25380f66; WORD $0x020c             // pmovsxdq    xmm1, qword [rdx + rax]
  6401  	LONG $0x447f0ff3; WORD $0xf041             // movdqu    oword [rcx + 2*rax - 16], xmm0
  6402  	LONG $0x0c7f0ff3; BYTE $0x41               // movdqu    oword [rcx + 2*rax], xmm1
  6403  	LONG $0x10c08348                           // add    rax, 16
  6404  	WORD $0xff49; BYTE $0xc0                   // inc    r8
  6405  	JNE  LBB0_834
  6406  
  6407  LBB0_835:
  6408  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6409  	JE   LBB0_1526
  6410  
  6411  LBB0_836:
  6412  	LONG $0xb2046348         // movsxd    rax, dword [rdx + 4*rsi]
  6413  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6414  	LONG $0x01c68348         // add    rsi, 1
  6415  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6416  	JNE  LBB0_836
  6417  	JMP  LBB0_1526
  6418  
  6419  LBB0_857:
  6420  	LONG $0xfce78348         // and    rdi, -4
  6421  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6422  	WORD $0xc031             // xor    eax, eax
  6423  
  6424  LBB0_858:
  6425  	LONG $0x35380f66; WORD $0x8204             // pmovzxdq    xmm0, qword [rdx + 4*rax]
  6426  	LONG $0x35380f66; WORD $0x824c; BYTE $0x08 // pmovzxdq    xmm1, qword [rdx + 4*rax + 8]
  6427  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6428  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6429  	LONG $0x35380f66; WORD $0x8244; BYTE $0x10 // pmovzxdq    xmm0, qword [rdx + 4*rax + 16]
  6430  	LONG $0x35380f66; WORD $0x824c; BYTE $0x18 // pmovzxdq    xmm1, qword [rdx + 4*rax + 24]
  6431  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6432  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6433  	LONG $0x35380f66; WORD $0x8244; BYTE $0x20 // pmovzxdq    xmm0, qword [rdx + 4*rax + 32]
  6434  	LONG $0x35380f66; WORD $0x824c; BYTE $0x28 // pmovzxdq    xmm1, qword [rdx + 4*rax + 40]
  6435  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6436  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6437  	LONG $0x35380f66; WORD $0x8244; BYTE $0x30 // pmovzxdq    xmm0, qword [rdx + 4*rax + 48]
  6438  	LONG $0x35380f66; WORD $0x824c; BYTE $0x38 // pmovzxdq    xmm1, qword [rdx + 4*rax + 56]
  6439  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6440  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6441  	LONG $0x10c08348                           // add    rax, 16
  6442  	LONG $0x04c78348                           // add    rdi, 4
  6443  	JNE  LBB0_858
  6444  
  6445  LBB0_859:
  6446  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6447  	JE   LBB0_862
  6448  	QUAD $0x0000000885048d48 // lea    rax, [4*rax + 8]
  6449  	WORD $0xf749; BYTE $0xd8 // neg    r8
  6450  
  6451  LBB0_861:
  6452  	LONG $0x35380f66; WORD $0x0244; BYTE $0xf8 // pmovzxdq    xmm0, qword [rdx + rax - 8]
  6453  	LONG $0x35380f66; WORD $0x020c             // pmovzxdq    xmm1, qword [rdx + rax]
  6454  	LONG $0x447f0ff3; WORD $0xf041             // movdqu    oword [rcx + 2*rax - 16], xmm0
  6455  	LONG $0x0c7f0ff3; BYTE $0x41               // movdqu    oword [rcx + 2*rax], xmm1
  6456  	LONG $0x10c08348                           // add    rax, 16
  6457  	WORD $0xff49; BYTE $0xc0                   // inc    r8
  6458  	JNE  LBB0_861
  6459  
  6460  LBB0_862:
  6461  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6462  	JE   LBB0_1526
  6463  
  6464  LBB0_863:
  6465  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  6466  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6467  	LONG $0x01c68348         // add    rsi, 1
  6468  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6469  	JNE  LBB0_863
  6470  	JMP  LBB0_1526
  6471  
  6472  LBB0_871:
  6473  	LONG $0xfce78348         // and    rdi, -4
  6474  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6475  	WORD $0xc031             // xor    eax, eax
  6476  
  6477  LBB0_872:
  6478  	LONG $0x34380f66; WORD $0x4204             // pmovzxwq    xmm0, dword [rdx + 2*rax]
  6479  	LONG $0x34380f66; WORD $0x424c; BYTE $0x04 // pmovzxwq    xmm1, dword [rdx + 2*rax + 4]
  6480  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6481  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6482  	LONG $0x34380f66; WORD $0x4244; BYTE $0x08 // pmovzxwq    xmm0, dword [rdx + 2*rax + 8]
  6483  	LONG $0x34380f66; WORD $0x424c; BYTE $0x0c // pmovzxwq    xmm1, dword [rdx + 2*rax + 12]
  6484  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6485  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6486  	LONG $0x34380f66; WORD $0x4244; BYTE $0x10 // pmovzxwq    xmm0, dword [rdx + 2*rax + 16]
  6487  	LONG $0x34380f66; WORD $0x424c; BYTE $0x14 // pmovzxwq    xmm1, dword [rdx + 2*rax + 20]
  6488  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6489  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6490  	LONG $0x34380f66; WORD $0x4244; BYTE $0x18 // pmovzxwq    xmm0, dword [rdx + 2*rax + 24]
  6491  	LONG $0x34380f66; WORD $0x424c; BYTE $0x1c // pmovzxwq    xmm1, dword [rdx + 2*rax + 28]
  6492  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6493  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6494  	LONG $0x10c08348                           // add    rax, 16
  6495  	LONG $0x04c78348                           // add    rdi, 4
  6496  	JNE  LBB0_872
  6497  
  6498  LBB0_873:
  6499  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6500  	JE   LBB0_876
  6501  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  6502  	LONG $0x10c78348         // add    rdi, 16
  6503  	LONG $0x42148d4c         // lea    r10, [rdx + 2*rax]
  6504  	LONG $0x04c28349         // add    r10, 4
  6505  	WORD $0xc031             // xor    eax, eax
  6506  
  6507  LBB0_875:
  6508  	QUAD $0xfcc24434380f4166                   // pmovzxwq    xmm0, dword [r10 + 8*rax - 4]
  6509  	LONG $0x380f4166; WORD $0x0c34; BYTE $0xc2 // pmovzxwq    xmm1, dword [r10 + 8*rax]
  6510  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  6511  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  6512  	LONG $0x20c78348                           // add    rdi, 32
  6513  	LONG $0x01c08348                           // add    rax, 1
  6514  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  6515  	JNE  LBB0_875
  6516  
  6517  LBB0_876:
  6518  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6519  	JE   LBB0_1526
  6520  
  6521  LBB0_877:
  6522  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  6523  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6524  	LONG $0x01c68348         // add    rsi, 1
  6525  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6526  	JNE  LBB0_877
  6527  	JMP  LBB0_1526
  6528  
  6529  LBB0_878:
  6530  	LONG $0xfce78348         // and    rdi, -4
  6531  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6532  	WORD $0xc031             // xor    eax, eax
  6533  
  6534  LBB0_879:
  6535  	LONG $0x24380f66; WORD $0x4204             // pmovsxwq    xmm0, dword [rdx + 2*rax]
  6536  	LONG $0x24380f66; WORD $0x424c; BYTE $0x04 // pmovsxwq    xmm1, dword [rdx + 2*rax + 4]
  6537  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6538  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6539  	LONG $0x24380f66; WORD $0x4244; BYTE $0x08 // pmovsxwq    xmm0, dword [rdx + 2*rax + 8]
  6540  	LONG $0x24380f66; WORD $0x424c; BYTE $0x0c // pmovsxwq    xmm1, dword [rdx + 2*rax + 12]
  6541  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6542  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6543  	LONG $0x24380f66; WORD $0x4244; BYTE $0x10 // pmovsxwq    xmm0, dword [rdx + 2*rax + 16]
  6544  	LONG $0x24380f66; WORD $0x424c; BYTE $0x14 // pmovsxwq    xmm1, dword [rdx + 2*rax + 20]
  6545  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6546  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6547  	LONG $0x24380f66; WORD $0x4244; BYTE $0x18 // pmovsxwq    xmm0, dword [rdx + 2*rax + 24]
  6548  	LONG $0x24380f66; WORD $0x424c; BYTE $0x1c // pmovsxwq    xmm1, dword [rdx + 2*rax + 28]
  6549  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6550  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6551  	LONG $0x10c08348                           // add    rax, 16
  6552  	LONG $0x04c78348                           // add    rdi, 4
  6553  	JNE  LBB0_879
  6554  
  6555  LBB0_880:
  6556  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6557  	JE   LBB0_883
  6558  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  6559  	LONG $0x10c78348         // add    rdi, 16
  6560  	LONG $0x42148d4c         // lea    r10, [rdx + 2*rax]
  6561  	LONG $0x04c28349         // add    r10, 4
  6562  	WORD $0xc031             // xor    eax, eax
  6563  
  6564  LBB0_882:
  6565  	QUAD $0xfcc24424380f4166                   // pmovsxwq    xmm0, dword [r10 + 8*rax - 4]
  6566  	LONG $0x380f4166; WORD $0x0c24; BYTE $0xc2 // pmovsxwq    xmm1, dword [r10 + 8*rax]
  6567  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  6568  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  6569  	LONG $0x20c78348                           // add    rdi, 32
  6570  	LONG $0x01c08348                           // add    rax, 1
  6571  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  6572  	JNE  LBB0_882
  6573  
  6574  LBB0_883:
  6575  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6576  	JE   LBB0_1526
  6577  
  6578  LBB0_884:
  6579  	LONG $0x04bf0f48; BYTE $0x72 // movsx    rax, word [rdx + 2*rsi]
  6580  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
  6581  	LONG $0x01c68348             // add    rsi, 1
  6582  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6583  	JNE  LBB0_884
  6584  	JMP  LBB0_1526
  6585  
  6586  LBB0_894:
  6587  	LONG $0xfce78348         // and    rdi, -4
  6588  	WORD $0xf748; BYTE $0xdf // neg    rdi
  6589  	WORD $0xc031             // xor    eax, eax
  6590  
  6591  LBB0_895:
  6592  	LONG $0x25380f66; WORD $0x8204             // pmovsxdq    xmm0, qword [rdx + 4*rax]
  6593  	LONG $0x25380f66; WORD $0x824c; BYTE $0x08 // pmovsxdq    xmm1, qword [rdx + 4*rax + 8]
  6594  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  6595  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  6596  	LONG $0x25380f66; WORD $0x8244; BYTE $0x10 // pmovsxdq    xmm0, qword [rdx + 4*rax + 16]
  6597  	LONG $0x25380f66; WORD $0x824c; BYTE $0x18 // pmovsxdq    xmm1, qword [rdx + 4*rax + 24]
  6598  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  6599  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  6600  	LONG $0x25380f66; WORD $0x8244; BYTE $0x20 // pmovsxdq    xmm0, qword [rdx + 4*rax + 32]
  6601  	LONG $0x25380f66; WORD $0x824c; BYTE $0x28 // pmovsxdq    xmm1, qword [rdx + 4*rax + 40]
  6602  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  6603  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  6604  	LONG $0x25380f66; WORD $0x8244; BYTE $0x30 // pmovsxdq    xmm0, qword [rdx + 4*rax + 48]
  6605  	LONG $0x25380f66; WORD $0x824c; BYTE $0x38 // pmovsxdq    xmm1, qword [rdx + 4*rax + 56]
  6606  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  6607  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  6608  	LONG $0x10c08348                           // add    rax, 16
  6609  	LONG $0x04c78348                           // add    rdi, 4
  6610  	JNE  LBB0_895
  6611  
  6612  LBB0_896:
  6613  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  6614  	JE   LBB0_899
  6615  	QUAD $0x0000000885048d48 // lea    rax, [4*rax + 8]
  6616  	WORD $0xf749; BYTE $0xd8 // neg    r8
  6617  
  6618  LBB0_898:
  6619  	LONG $0x25380f66; WORD $0x0244; BYTE $0xf8 // pmovsxdq    xmm0, qword [rdx + rax - 8]
  6620  	LONG $0x25380f66; WORD $0x020c             // pmovsxdq    xmm1, qword [rdx + rax]
  6621  	LONG $0x447f0ff3; WORD $0xf041             // movdqu    oword [rcx + 2*rax - 16], xmm0
  6622  	LONG $0x0c7f0ff3; BYTE $0x41               // movdqu    oword [rcx + 2*rax], xmm1
  6623  	LONG $0x10c08348                           // add    rax, 16
  6624  	WORD $0xff49; BYTE $0xc0                   // inc    r8
  6625  	JNE  LBB0_898
  6626  
  6627  LBB0_899:
  6628  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6629  	JE   LBB0_1526
  6630  
  6631  LBB0_900:
  6632  	LONG $0xb2046348         // movsxd    rax, dword [rdx + 4*rsi]
  6633  	LONG $0xf1048948         // mov    qword [rcx + 8*rsi], rax
  6634  	LONG $0x01c68348         // add    rsi, 1
  6635  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6636  	JNE  LBB0_900
  6637  	JMP  LBB0_1526
  6638  
  6639  LBB0_939:
  6640  	WORD $0xff31 // xor    edi, edi
  6641  
  6642  LBB0_940:
  6643  	LONG $0x01c0f641               // test    r8b, 1
  6644  	JE   LBB0_942
  6645  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  6646  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6647  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  6648  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  6649  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  6650  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  6651  
  6652  LBB0_942:
  6653  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6654  	JE   LBB0_1526
  6655  
  6656  LBB0_943:
  6657  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  6658  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  6659  	LONG $0x01c68348         // add    rsi, 1
  6660  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6661  	JNE  LBB0_943
  6662  	JMP  LBB0_1526
  6663  
  6664  LBB0_944:
  6665  	WORD $0xff31 // xor    edi, edi
  6666  
  6667  LBB0_945:
  6668  	LONG $0x01c0f641               // test    r8b, 1
  6669  	JE   LBB0_947
  6670  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  6671  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6672  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  6673  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  6674  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  6675  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  6676  
  6677  LBB0_947:
  6678  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6679  	JE   LBB0_1526
  6680  
  6681  LBB0_948:
  6682  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  6683  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  6684  	LONG $0x01c68348         // add    rsi, 1
  6685  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6686  	JNE  LBB0_948
  6687  	JMP  LBB0_1526
  6688  
  6689  LBB0_949:
  6690  	WORD $0xff31 // xor    edi, edi
  6691  
  6692  LBB0_950:
  6693  	LONG $0x01c0f641                           // test    r8b, 1
  6694  	JE   LBB0_952
  6695  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  6696  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  6697  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  6698  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  6699  
  6700  LBB0_952:
  6701  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6702  	JE   LBB0_1526
  6703  
  6704  LBB0_953:
  6705  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  6706  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  6707  	LONG $0x01c68348         // add    rsi, 1
  6708  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6709  	JNE  LBB0_953
  6710  	JMP  LBB0_1526
  6711  
  6712  LBB0_954:
  6713  	WORD $0xff31 // xor    edi, edi
  6714  
  6715  LBB0_955:
  6716  	LONG $0x01c0f641                           // test    r8b, 1
  6717  	JE   LBB0_957
  6718  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  6719  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  6720  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  6721  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  6722  
  6723  LBB0_957:
  6724  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6725  	JE   LBB0_1526
  6726  
  6727  LBB0_958:
  6728  	LONG $0x7204bf0f         // movsx    eax, word [rdx + 2*rsi]
  6729  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  6730  	LONG $0x01c68348         // add    rsi, 1
  6731  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6732  	JNE  LBB0_958
  6733  	JMP  LBB0_1526
  6734  
  6735  LBB0_959:
  6736  	WORD $0xff31 // xor    edi, edi
  6737  
  6738  LBB0_960:
  6739  	LONG $0x01c0f641               // test    r8b, 1
  6740  	JE   LBB0_962
  6741  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  6742  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6743  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  6744  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  6745  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  6746  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  6747  
  6748  LBB0_962:
  6749  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6750  	JE   LBB0_1526
  6751  
  6752  LBB0_963:
  6753  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  6754  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  6755  	LONG $0x01c68348         // add    rsi, 1
  6756  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6757  	JNE  LBB0_963
  6758  	JMP  LBB0_1526
  6759  
  6760  LBB0_964:
  6761  	WORD $0xff31 // xor    edi, edi
  6762  
  6763  LBB0_965:
  6764  	LONG $0x01c0f641             // test    r8b, 1
  6765  	JE   LBB0_967
  6766  	LONG $0xba0c100f             // movups    xmm1, oword [rdx + 4*rdi]
  6767  	LONG $0xba54100f; BYTE $0x10 // movups    xmm2, oword [rdx + 4*rdi + 16]
  6768  	LONG $0x205d280f             // movaps    xmm3, oword 32[rbp] /* [rip + .LCPI0_3] */
  6769  	WORD $0x280f; BYTE $0xc1     // movaps    xmm0, xmm1
  6770  	LONG $0x01c3c20f             // cmpltps    xmm0, xmm3
  6771  	LONG $0xe15b0ff3             // cvttps2dq    xmm4, xmm1
  6772  	WORD $0x5c0f; BYTE $0xcb     // subps    xmm1, xmm3
  6773  	LONG $0xc95b0ff3             // cvttps2dq    xmm1, xmm1
  6774  	LONG $0x306d280f             // movaps    xmm5, oword 48[rbp] /* [rip + .LCPI0_4] */
  6775  	WORD $0x570f; BYTE $0xcd     // xorps    xmm1, xmm5
  6776  	LONG $0x14380f66; BYTE $0xcc // blendvps    xmm1, xmm4, xmm0
  6777  	WORD $0x280f; BYTE $0xc2     // movaps    xmm0, xmm2
  6778  	LONG $0x01c3c20f             // cmpltps    xmm0, xmm3
  6779  	LONG $0xe25b0ff3             // cvttps2dq    xmm4, xmm2
  6780  	WORD $0x5c0f; BYTE $0xd3     // subps    xmm2, xmm3
  6781  	LONG $0xd25b0ff3             // cvttps2dq    xmm2, xmm2
  6782  	WORD $0x570f; BYTE $0xd5     // xorps    xmm2, xmm5
  6783  	LONG $0x14380f66; BYTE $0xd4 // blendvps    xmm2, xmm4, xmm0
  6784  	LONG $0xb90c110f             // movups    oword [rcx + 4*rdi], xmm1
  6785  	LONG $0xb954110f; BYTE $0x10 // movups    oword [rcx + 4*rdi + 16], xmm2
  6786  
  6787  LBB0_967:
  6788  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6789  	JE   LBB0_1526
  6790  
  6791  LBB0_968:
  6792  	LONG $0x2c0f48f3; WORD $0xb204 // cvttss2si    rax, dword [rdx + 4*rsi]
  6793  	WORD $0x0489; BYTE $0xb1       // mov    dword [rcx + 4*rsi], eax
  6794  	LONG $0x01c68348               // add    rsi, 1
  6795  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
  6796  	JNE  LBB0_968
  6797  	JMP  LBB0_1526
  6798  
  6799  LBB0_969:
  6800  	WORD $0xff31 // xor    edi, edi
  6801  
  6802  LBB0_970:
  6803  	LONG $0x01c0f641               // test    r8b, 1
  6804  	JE   LBB0_972
  6805  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  6806  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6807  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
  6808  	LONG $0xd86f0f66               // movdqa    xmm3, xmm0
  6809  	LONG $0x0e3a0f66; WORD $0xccda // pblendw    xmm3, xmm2, 204
  6810  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI0_6] */
  6811  	LONG $0xdceb0f66               // por    xmm3, xmm4
  6812  	LONG $0xd0730f66; BYTE $0x20   // psrlq    xmm0, 32
  6813  	LONG $0x6d6f0f66; BYTE $0x60   // movdqa    xmm5, oword 96[rbp] /* [rip + .LCPI0_7] */
  6814  	LONG $0xc5eb0f66               // por    xmm0, xmm5
  6815  	LONG $0x75280f66; BYTE $0x70   // movapd    xmm6, oword 112[rbp] /* [rip + .LCPI0_8] */
  6816  	LONG $0xc65c0f66               // subpd    xmm0, xmm6
  6817  	LONG $0xc3580f66               // addpd    xmm0, xmm3
  6818  	LONG $0x0e3a0f66; WORD $0x33d1 // pblendw    xmm2, xmm1, 51
  6819  	LONG $0xd4eb0f66               // por    xmm2, xmm4
  6820  	LONG $0xd1730f66; BYTE $0x20   // psrlq    xmm1, 32
  6821  	LONG $0xcdeb0f66               // por    xmm1, xmm5
  6822  	LONG $0xce5c0f66               // subpd    xmm1, xmm6
  6823  	LONG $0xca580f66               // addpd    xmm1, xmm2
  6824  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
  6825  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
  6826  
  6827  LBB0_972:
  6828  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6829  	JE   LBB0_1526
  6830  
  6831  LBB0_973:
  6832  	QUAD $0x0000008085280f66 // movapd    xmm0, oword 128[rbp] /* [rip + .LCPI0_9] */
  6833  	QUAD $0x000000908d280f66 // movapd    xmm1, oword 144[rbp] /* [rip + .LCPI0_10] */
  6834  
  6835  LBB0_974:
  6836  	LONG $0x14100ff2; BYTE $0xf2 // movsd    xmm2, qword [rdx + 8*rsi]
  6837  	WORD $0x140f; BYTE $0xd0     // unpcklps    xmm2, xmm0
  6838  	LONG $0xd15c0f66             // subpd    xmm2, xmm1
  6839  	LONG $0xda280f66             // movapd    xmm3, xmm2
  6840  	LONG $0xda150f66             // unpckhpd    xmm3, xmm2
  6841  	LONG $0xda580ff2             // addsd    xmm3, xmm2
  6842  	LONG $0x1c110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm3
  6843  	LONG $0x01c68348             // add    rsi, 1
  6844  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6845  	JNE  LBB0_974
  6846  	JMP  LBB0_1526
  6847  
  6848  LBB0_975:
  6849  	WORD $0xff31 // xor    edi, edi
  6850  
  6851  LBB0_976:
  6852  	LONG $0x01c0f641               // test    r8b, 1
  6853  	JE   LBB0_978
  6854  	LONG $0xba045a0f               // cvtps2pd    xmm0, qword [rdx + 4*rdi]
  6855  	LONG $0xba4c5a0f; BYTE $0x08   // cvtps2pd    xmm1, qword [rdx + 4*rdi + 8]
  6856  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
  6857  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
  6858  
  6859  LBB0_978:
  6860  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6861  	JE   LBB0_1526
  6862  
  6863  LBB0_979:
  6864  	LONG $0x04100ff3; BYTE $0xb2 // movss    xmm0, dword [rdx + 4*rsi]
  6865  	LONG $0xc05a0ff3             // cvtss2sd    xmm0, xmm0
  6866  	LONG $0x04110ff2; BYTE $0xf1 // movsd    qword [rcx + 8*rsi], xmm0
  6867  	LONG $0x01c68348             // add    rsi, 1
  6868  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6869  	JNE  LBB0_979
  6870  	JMP  LBB0_1526
  6871  
  6872  LBB0_980:
  6873  	WORD $0xff31 // xor    edi, edi
  6874  
  6875  LBB0_981:
  6876  	LONG $0x01c0f641               // test    r8b, 1
  6877  	JE   LBB0_983
  6878  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  6879  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6880  	QUAD $0x000000b0956f0f66       // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI0_12] */
  6881  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  6882  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  6883  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  6884  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  6885  
  6886  LBB0_983:
  6887  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6888  	JE   LBB0_1526
  6889  
  6890  LBB0_984:
  6891  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  6892  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  6893  	LONG $0x01c68348         // add    rsi, 1
  6894  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6895  	JNE  LBB0_984
  6896  	JMP  LBB0_1526
  6897  
  6898  LBB0_985:
  6899  	WORD $0xff31 // xor    edi, edi
  6900  
  6901  LBB0_986:
  6902  	LONG $0x01c0f641               // test    r8b, 1
  6903  	JE   LBB0_988
  6904  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  6905  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6906  	QUAD $0x000000b0956f0f66       // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI0_12] */
  6907  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  6908  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  6909  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  6910  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  6911  
  6912  LBB0_988:
  6913  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6914  	JE   LBB0_1526
  6915  
  6916  LBB0_989:
  6917  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  6918  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  6919  	LONG $0x01c68348         // add    rsi, 1
  6920  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  6921  	JNE  LBB0_989
  6922  	JMP  LBB0_1526
  6923  
  6924  LBB0_990:
  6925  	WORD $0xff31 // xor    edi, edi
  6926  
  6927  LBB0_991:
  6928  	LONG $0x01c0f641               // test    r8b, 1
  6929  	JE   LBB0_993
  6930  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  6931  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6932  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  6933  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  6934  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  6935  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  6936  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  6937  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  6938  
  6939  LBB0_993:
  6940  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6941  	JE   LBB0_1526
  6942  
  6943  LBB0_994:
  6944  	LONG $0x042c0ff2; BYTE $0xf2 // cvttsd2si    eax, qword [rdx + 8*rsi]
  6945  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  6946  	LONG $0x01c68348             // add    rsi, 1
  6947  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6948  	JNE  LBB0_994
  6949  	JMP  LBB0_1526
  6950  
  6951  LBB0_995:
  6952  	WORD $0xff31 // xor    edi, edi
  6953  
  6954  LBB0_996:
  6955  	LONG $0x01c0f641               // test    r8b, 1
  6956  	JE   LBB0_998
  6957  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  6958  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6959  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  6960  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  6961  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  6962  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  6963  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  6964  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  6965  
  6966  LBB0_998:
  6967  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6968  	JE   LBB0_1526
  6969  
  6970  LBB0_999:
  6971  	LONG $0x042c0ff2; BYTE $0xf2 // cvttsd2si    eax, qword [rdx + 8*rsi]
  6972  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  6973  	LONG $0x01c68348             // add    rsi, 1
  6974  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  6975  	JNE  LBB0_999
  6976  	JMP  LBB0_1526
  6977  
  6978  LBB0_1000:
  6979  	WORD $0xff31 // xor    edi, edi
  6980  
  6981  LBB0_1001:
  6982  	LONG $0x01c0f641               // test    r8b, 1
  6983  	JE   LBB0_1003
  6984  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  6985  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6986  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  6987  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  6988  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  6989  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  6990  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  6991  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  6992  
  6993  LBB0_1003:
  6994  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  6995  	JE   LBB0_1526
  6996  
  6997  LBB0_1004:
  6998  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  6999  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7000  	LONG $0x01c68348         // add    rsi, 1
  7001  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7002  	JNE  LBB0_1004
  7003  	JMP  LBB0_1526
  7004  
  7005  LBB0_1005:
  7006  	WORD $0xff31 // xor    edi, edi
  7007  
  7008  LBB0_1006:
  7009  	LONG $0x01c0f641               // test    r8b, 1
  7010  	JE   LBB0_1008
  7011  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  7012  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  7013  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  7014  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  7015  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  7016  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  7017  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  7018  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  7019  
  7020  LBB0_1008:
  7021  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7022  	JE   LBB0_1526
  7023  
  7024  LBB0_1009:
  7025  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  7026  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7027  	LONG $0x01c68348         // add    rsi, 1
  7028  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7029  	JNE  LBB0_1009
  7030  	JMP  LBB0_1526
  7031  
  7032  LBB0_1010:
  7033  	WORD $0xff31 // xor    edi, edi
  7034  
  7035  LBB0_1011:
  7036  	LONG $0x01c0f641               // test    r8b, 1
  7037  	JE   LBB0_1013
  7038  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  7039  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  7040  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  7041  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  7042  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  7043  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  7044  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  7045  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  7046  
  7047  LBB0_1013:
  7048  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7049  	JE   LBB0_1526
  7050  
  7051  LBB0_1014:
  7052  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  7053  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7054  	LONG $0x01c68348         // add    rsi, 1
  7055  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7056  	JNE  LBB0_1014
  7057  	JMP  LBB0_1526
  7058  
  7059  LBB0_1015:
  7060  	WORD $0xff31 // xor    edi, edi
  7061  
  7062  LBB0_1016:
  7063  	LONG $0x01c0f641               // test    r8b, 1
  7064  	JE   LBB0_1018
  7065  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  7066  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  7067  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  7068  	LONG $0xc0700ff2; BYTE $0xe8   // pshuflw    xmm0, xmm0, 232
  7069  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  7070  	LONG $0xc9700ff2; BYTE $0xe8   // pshuflw    xmm1, xmm1, 232
  7071  	LONG $0x047e0f66; BYTE $0x79   // movd    dword [rcx + 2*rdi], xmm0
  7072  	LONG $0x4c7e0f66; WORD $0x0479 // movd    dword [rcx + 2*rdi + 4], xmm1
  7073  
  7074  LBB0_1018:
  7075  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7076  	JE   LBB0_1526
  7077  
  7078  LBB0_1019:
  7079  	LONG $0xf204b70f         // movzx    eax, word [rdx + 8*rsi]
  7080  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7081  	LONG $0x01c68348         // add    rsi, 1
  7082  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7083  	JNE  LBB0_1019
  7084  	JMP  LBB0_1526
  7085  
  7086  LBB0_1020:
  7087  	WORD $0xff31 // xor    edi, edi
  7088  
  7089  LBB0_1021:
  7090  	LONG $0x01c0f641             // test    r8b, 1
  7091  	JE   LBB0_1023
  7092  	LONG $0xba04100f             // movups    xmm0, oword [rdx + 4*rdi]
  7093  	LONG $0xba4c100f; BYTE $0x10 // movups    xmm1, oword [rdx + 4*rdi + 16]
  7094  	LONG $0xc05b0ff3             // cvttps2dq    xmm0, xmm0
  7095  	LONG $0xc95b0ff3             // cvttps2dq    xmm1, xmm1
  7096  	LONG $0x2b380f66; BYTE $0xc1 // packusdw    xmm0, xmm1
  7097  	LONG $0x047f0ff3; BYTE $0x79 // movdqu    oword [rcx + 2*rdi], xmm0
  7098  
  7099  LBB0_1023:
  7100  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7101  	JE   LBB0_1526
  7102  
  7103  LBB0_1024:
  7104  	LONG $0x042c0ff3; BYTE $0xb2 // cvttss2si    eax, dword [rdx + 4*rsi]
  7105  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  7106  	LONG $0x01c68348             // add    rsi, 1
  7107  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7108  	JNE  LBB0_1024
  7109  	JMP  LBB0_1526
  7110  
  7111  LBB0_1025:
  7112  	WORD $0xff31 // xor    edi, edi
  7113  
  7114  LBB0_1026:
  7115  	LONG $0x01c0f641             // test    r8b, 1
  7116  	JE   LBB0_1028
  7117  	LONG $0xba04100f             // movups    xmm0, oword [rdx + 4*rdi]
  7118  	LONG $0xba4c100f; BYTE $0x10 // movups    xmm1, oword [rdx + 4*rdi + 16]
  7119  	LONG $0xc05b0ff3             // cvttps2dq    xmm0, xmm0
  7120  	LONG $0xc95b0ff3             // cvttps2dq    xmm1, xmm1
  7121  	LONG $0xc16b0f66             // packssdw    xmm0, xmm1
  7122  	LONG $0x047f0ff3; BYTE $0x79 // movdqu    oword [rcx + 2*rdi], xmm0
  7123  
  7124  LBB0_1028:
  7125  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7126  	JE   LBB0_1526
  7127  
  7128  LBB0_1029:
  7129  	LONG $0x042c0ff3; BYTE $0xb2 // cvttss2si    eax, dword [rdx + 4*rsi]
  7130  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
  7131  	LONG $0x01c68348             // add    rsi, 1
  7132  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7133  	JNE  LBB0_1029
  7134  	JMP  LBB0_1526
  7135  
  7136  LBB0_1030:
  7137  	WORD $0xff31 // xor    edi, edi
  7138  
  7139  LBB0_1031:
  7140  	LONG $0x01c0f641               // test    r8b, 1
  7141  	JE   LBB0_1033
  7142  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  7143  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7144  	QUAD $0x000000b0956f0f66       // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI0_12] */
  7145  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  7146  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  7147  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  7148  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  7149  
  7150  LBB0_1033:
  7151  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7152  	JE   LBB0_1526
  7153  
  7154  LBB0_1034:
  7155  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  7156  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7157  	LONG $0x01c68348         // add    rsi, 1
  7158  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7159  	JNE  LBB0_1034
  7160  	JMP  LBB0_1526
  7161  
  7162  LBB0_1035:
  7163  	WORD $0xff31 // xor    edi, edi
  7164  
  7165  LBB0_1036:
  7166  	LONG $0x01c0f641               // test    r8b, 1
  7167  	JE   LBB0_1038
  7168  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  7169  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7170  	QUAD $0x000000b0956f0f66       // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI0_12] */
  7171  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  7172  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  7173  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  7174  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
  7175  
  7176  LBB0_1038:
  7177  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7178  	JE   LBB0_1526
  7179  
  7180  LBB0_1039:
  7181  	LONG $0xb204b70f         // movzx    eax, word [rdx + 4*rsi]
  7182  	LONG $0x71048966         // mov    word [rcx + 2*rsi], ax
  7183  	LONG $0x01c68348         // add    rsi, 1
  7184  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7185  	JNE  LBB0_1039
  7186  	JMP  LBB0_1526
  7187  
  7188  LBB0_1040:
  7189  	WORD $0xff31 // xor    edi, edi
  7190  
  7191  LBB0_1041:
  7192  	LONG $0x01c0f641                           // test    r8b, 1
  7193  	JE   LBB0_1043
  7194  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7195  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7196  	QUAD $0x000000d0956f0f66                   // movdqa    xmm2, oword 208[rbp] /* [rip + .LCPI0_14] */
  7197  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
  7198  	LONG $0x0e3a0f66; WORD $0xaada             // pblendw    xmm3, xmm2, 170
  7199  	LONG $0xd0720f66; BYTE $0x10               // psrld    xmm0, 16
  7200  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI0_15] */
  7201  	LONG $0x0e3a0f66; WORD $0xaac4             // pblendw    xmm0, xmm4, 170
  7202  	LONG $0xf0ad280f; WORD $0x0000; BYTE $0x00 // movaps    xmm5, oword 240[rbp] /* [rip + .LCPI0_16] */
  7203  	WORD $0x5c0f; BYTE $0xc5                   // subps    xmm0, xmm5
  7204  	WORD $0x580f; BYTE $0xc3                   // addps    xmm0, xmm3
  7205  	LONG $0x0e3a0f66; WORD $0x55d1             // pblendw    xmm2, xmm1, 85
  7206  	LONG $0xd1720f66; BYTE $0x10               // psrld    xmm1, 16
  7207  	LONG $0x0e3a0f66; WORD $0xaacc             // pblendw    xmm1, xmm4, 170
  7208  	WORD $0x5c0f; BYTE $0xcd                   // subps    xmm1, xmm5
  7209  	WORD $0x580f; BYTE $0xca                   // addps    xmm1, xmm2
  7210  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  7211  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  7212  
  7213  LBB0_1043:
  7214  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7215  	JE   LBB0_1526
  7216  
  7217  LBB0_1044:
  7218  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  7219  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7220  	LONG $0x2a0f48f3; BYTE $0xc0 // cvtsi2ss    xmm0, rax
  7221  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7222  	LONG $0x01c68348             // add    rsi, 1
  7223  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7224  	JNE  LBB0_1044
  7225  	JMP  LBB0_1526
  7226  
  7227  LBB0_1045:
  7228  	WORD $0xff31 // xor    edi, edi
  7229  
  7230  LBB0_1046:
  7231  	LONG $0x01c0f641               // test    r8b, 1
  7232  	JE   LBB0_1048
  7233  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  7234  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  7235  	LONG $0xc05a0f66               // cvtpd2ps    xmm0, xmm0
  7236  	LONG $0xc95a0f66               // cvtpd2ps    xmm1, xmm1
  7237  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  7238  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
  7239  
  7240  LBB0_1048:
  7241  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7242  	JE   LBB0_1526
  7243  
  7244  LBB0_1049:
  7245  	LONG $0x04100ff2; BYTE $0xf2 // movsd    xmm0, qword [rdx + 8*rsi]
  7246  	LONG $0xc05a0ff2             // cvtsd2ss    xmm0, xmm0
  7247  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7248  	LONG $0x01c68348             // add    rsi, 1
  7249  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7250  	JNE  LBB0_1049
  7251  	JMP  LBB0_1526
  7252  
  7253  LBB0_1050:
  7254  	WORD $0xff31 // xor    edi, edi
  7255  
  7256  LBB0_1051:
  7257  	LONG $0x01c0f641                           // test    r8b, 1
  7258  	JE   LBB0_1053
  7259  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  7260  	QUAD $0x000000a09d6f0f66                   // movdqa    xmm3, oword 160[rbp] /* [rip + .LCPI0_11] */
  7261  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
  7262  	LONG $0xd06f0f66                           // movdqa    xmm2, xmm0
  7263  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
  7264  	LONG $0xe3db0f66                           // pand    xmm4, xmm3
  7265  	LONG $0xd1730f66; BYTE $0x01               // psrlq    xmm1, 1
  7266  	LONG $0xcceb0f66                           // por    xmm1, xmm4
  7267  	LONG $0x15380f66; BYTE $0xd1               // blendvpd    xmm2, xmm1, xmm0
  7268  	LONG $0x3a0f4866; WORD $0xd016; BYTE $0x01 // pextrq    rax, xmm2, 1
  7269  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
  7270  	LONG $0x2a0f48f3; BYTE $0xe0               // cvtsi2ss    xmm4, rax
  7271  	LONG $0x7e0f4866; BYTE $0xd0               // movq    rax, xmm2
  7272  	WORD $0x570f; BYTE $0xd2                   // xorps    xmm2, xmm2
  7273  	LONG $0x2a0f48f3; BYTE $0xd0               // cvtsi2ss    xmm2, rax
  7274  	LONG $0xedef0f66                           // pxor    xmm5, xmm5
  7275  	LONG $0x37380f66; BYTE $0xe8               // pcmpgtq    xmm5, xmm0
  7276  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  7277  	LONG $0x213a0f66; WORD $0x1cd4             // insertps    xmm2, xmm4, 28
  7278  	WORD $0x280f; BYTE $0xe2                   // movaps    xmm4, xmm2
  7279  	WORD $0x580f; BYTE $0xe2                   // addps    xmm4, xmm2
  7280  	LONG $0xf6ef0f66                           // pxor    xmm6, xmm6
  7281  	LONG $0xc5700f66; BYTE $0xed               // pshufd    xmm0, xmm5, 237
  7282  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
  7283  	LONG $0xd9db0f66                           // pand    xmm3, xmm1
  7284  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
  7285  	LONG $0xd4730f66; BYTE $0x01               // psrlq    xmm4, 1
  7286  	LONG $0xe3eb0f66                           // por    xmm4, xmm3
  7287  	LONG $0x37380f66; BYTE $0xf1               // pcmpgtq    xmm6, xmm1
  7288  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
  7289  	LONG $0x15380f66; BYTE $0xcc               // blendvpd    xmm1, xmm4, xmm0
  7290  	LONG $0x3a0f4866; WORD $0xc816; BYTE $0x01 // pextrq    rax, xmm1, 1
  7291  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
  7292  	LONG $0x2a0f48f3; BYTE $0xc0               // cvtsi2ss    xmm0, rax
  7293  	LONG $0x7e0f4866; BYTE $0xc8               // movq    rax, xmm1
  7294  	WORD $0x570f; BYTE $0xc9                   // xorps    xmm1, xmm1
  7295  	LONG $0x2a0f48f3; BYTE $0xc8               // cvtsi2ss    xmm1, rax
  7296  	LONG $0x213a0f66; WORD $0x1cc8             // insertps    xmm1, xmm0, 28
  7297  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
  7298  	WORD $0x580f; BYTE $0xd9                   // addps    xmm3, xmm1
  7299  	LONG $0xc6700f66; BYTE $0xed               // pshufd    xmm0, xmm6, 237
  7300  	LONG $0x14380f66; BYTE $0xcb               // blendvps    xmm1, xmm3, xmm0
  7301  	WORD $0x160f; BYTE $0xd1                   // movlhps    xmm2, xmm1
  7302  	LONG $0xb914110f                           // movups    oword [rcx + 4*rdi], xmm2
  7303  
  7304  LBB0_1053:
  7305  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7306  	JNE  LBB0_1056
  7307  	JMP  LBB0_1526
  7308  
  7309  LBB0_1054:
  7310  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7311  	LONG $0x2a0f48f3; BYTE $0xc0 // cvtsi2ss    xmm0, rax
  7312  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7313  	LONG $0x01c68348             // add    rsi, 1
  7314  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7315  	JE   LBB0_1526
  7316  
  7317  LBB0_1056:
  7318  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  7319  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  7320  	JNS  LBB0_1054
  7321  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
  7322  	WORD $0xd148; BYTE $0xef     // shr    rdi, 1
  7323  	WORD $0xe083; BYTE $0x01     // and    eax, 1
  7324  	WORD $0x0948; BYTE $0xf8     // or    rax, rdi
  7325  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7326  	LONG $0x2a0f48f3; BYTE $0xc0 // cvtsi2ss    xmm0, rax
  7327  	LONG $0xc0580ff3             // addss    xmm0, xmm0
  7328  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7329  	LONG $0x01c68348             // add    rsi, 1
  7330  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7331  	JNE  LBB0_1056
  7332  	JMP  LBB0_1526
  7333  
  7334  LBB0_1058:
  7335  	WORD $0xff31 // xor    edi, edi
  7336  
  7337  LBB0_1059:
  7338  	LONG $0x01c0f641                           // test    r8b, 1
  7339  	JE   LBB0_1061
  7340  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  7341  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  7342  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  7343  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  7344  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  7345  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  7346  
  7347  LBB0_1061:
  7348  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7349  	JE   LBB0_1526
  7350  
  7351  LBB0_1062:
  7352  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  7353  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7354  	LONG $0xc02a0ff3             // cvtsi2ss    xmm0, eax
  7355  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7356  	LONG $0x01c68348             // add    rsi, 1
  7357  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7358  	JNE  LBB0_1062
  7359  	JMP  LBB0_1526
  7360  
  7361  LBB0_1063:
  7362  	WORD $0xff31 // xor    edi, edi
  7363  
  7364  LBB0_1064:
  7365  	LONG $0x01c0f641                           // test    r8b, 1
  7366  	JE   LBB0_1066
  7367  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  7368  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  7369  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  7370  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  7371  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  7372  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  7373  
  7374  LBB0_1066:
  7375  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7376  	JE   LBB0_1526
  7377  
  7378  LBB0_1067:
  7379  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
  7380  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7381  	LONG $0xc02a0ff3             // cvtsi2ss    xmm0, eax
  7382  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7383  	LONG $0x01c68348             // add    rsi, 1
  7384  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7385  	JNE  LBB0_1067
  7386  	JMP  LBB0_1526
  7387  
  7388  LBB0_1068:
  7389  	WORD $0xff31 // xor    edi, edi
  7390  
  7391  LBB0_1069:
  7392  	LONG $0x01c0f641             // test    r8b, 1
  7393  	JE   LBB0_1071
  7394  	LONG $0xba04100f             // movups    xmm0, oword [rdx + 4*rdi]
  7395  	LONG $0xba4c100f; BYTE $0x10 // movups    xmm1, oword [rdx + 4*rdi + 16]
  7396  	WORD $0x5b0f; BYTE $0xc0     // cvtdq2ps    xmm0, xmm0
  7397  	WORD $0x5b0f; BYTE $0xc9     // cvtdq2ps    xmm1, xmm1
  7398  	LONG $0xb904110f             // movups    oword [rcx + 4*rdi], xmm0
  7399  	LONG $0xb94c110f; BYTE $0x10 // movups    oword [rcx + 4*rdi + 16], xmm1
  7400  
  7401  LBB0_1071:
  7402  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7403  	JE   LBB0_1526
  7404  
  7405  LBB0_1072:
  7406  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
  7407  	LONG $0x042a0ff3; BYTE $0xb2 // cvtsi2ss    xmm0, dword [rdx + 4*rsi]
  7408  	LONG $0x04110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm0
  7409  	LONG $0x01c68348             // add    rsi, 1
  7410  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7411  	JNE  LBB0_1072
  7412  	JMP  LBB0_1526
  7413  
  7414  LBB0_1073:
  7415  	WORD $0xff31 // xor    edi, edi
  7416  
  7417  LBB0_1074:
  7418  	LONG $0x01c0f641               // test    r8b, 1
  7419  	JE   LBB0_1076
  7420  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
  7421  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
  7422  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
  7423  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
  7424  	LONG $0xc1140f66               // unpcklpd    xmm0, xmm1
  7425  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
  7426  
  7427  LBB0_1076:
  7428  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7429  	JE   LBB0_1526
  7430  
  7431  LBB0_1077:
  7432  	LONG $0x042c0ff2; BYTE $0xf2 // cvttsd2si    eax, qword [rdx + 8*rsi]
  7433  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  7434  	LONG $0x01c68348             // add    rsi, 1
  7435  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7436  	JNE  LBB0_1077
  7437  	JMP  LBB0_1526
  7438  
  7439  LBB0_1078:
  7440  	WORD $0xff31 // xor    edi, edi
  7441  
  7442  LBB0_1079:
  7443  	LONG $0x01c0f641                           // test    r8b, 1
  7444  	JE   LBB0_1081
  7445  	LONG $0x33380f66; WORD $0x7a04             // pmovzxwd    xmm0, qword [rdx + 2*rdi]
  7446  	LONG $0x33380f66; WORD $0x7a4c; BYTE $0x08 // pmovzxwd    xmm1, qword [rdx + 2*rdi + 8]
  7447  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  7448  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  7449  
  7450  LBB0_1081:
  7451  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7452  	JE   LBB0_1526
  7453  
  7454  LBB0_1082:
  7455  	LONG $0x7204b70f         // movzx    eax, word [rdx + 2*rsi]
  7456  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  7457  	LONG $0x01c68348         // add    rsi, 1
  7458  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7459  	JNE  LBB0_1082
  7460  	JMP  LBB0_1526
  7461  
  7462  LBB0_1083:
  7463  	WORD $0xff31 // xor    edi, edi
  7464  
  7465  LBB0_1084:
  7466  	LONG $0x01c0f641                           // test    r8b, 1
  7467  	JE   LBB0_1086
  7468  	LONG $0x23380f66; WORD $0x7a04             // pmovsxwd    xmm0, qword [rdx + 2*rdi]
  7469  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
  7470  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  7471  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  7472  
  7473  LBB0_1086:
  7474  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7475  	JE   LBB0_1526
  7476  
  7477  LBB0_1087:
  7478  	LONG $0x7204bf0f         // movsx    eax, word [rdx + 2*rsi]
  7479  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  7480  	LONG $0x01c68348         // add    rsi, 1
  7481  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7482  	JNE  LBB0_1087
  7483  	JMP  LBB0_1526
  7484  
  7485  LBB0_1088:
  7486  	WORD $0xff31 // xor    edi, edi
  7487  
  7488  LBB0_1089:
  7489  	LONG $0x01c0f641               // test    r8b, 1
  7490  	JE   LBB0_1091
  7491  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
  7492  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  7493  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
  7494  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
  7495  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  7496  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
  7497  
  7498  LBB0_1091:
  7499  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7500  	JE   LBB0_1526
  7501  
  7502  LBB0_1092:
  7503  	WORD $0x048b; BYTE $0xf2 // mov    eax, dword [rdx + 8*rsi]
  7504  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
  7505  	LONG $0x01c68348         // add    rsi, 1
  7506  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
  7507  	JNE  LBB0_1092
  7508  	JMP  LBB0_1526
  7509  
  7510  LBB0_1093:
  7511  	WORD $0xff31 // xor    edi, edi
  7512  
  7513  LBB0_1094:
  7514  	LONG $0x01c0f641               // test    r8b, 1
  7515  	JE   LBB0_1096
  7516  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  7517  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  7518  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  7519  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  7520  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
  7521  	LONG $0x4c110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm1
  7522  
  7523  LBB0_1096:
  7524  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7525  	JE   LBB0_1526
  7526  
  7527  LBB0_1097:
  7528  	LONG $0x042c0ff3; BYTE $0xb2 // cvttss2si    eax, dword [rdx + 4*rsi]
  7529  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
  7530  	LONG $0x01c68348             // add    rsi, 1
  7531  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
  7532  	JNE  LBB0_1097
  7533  
  7534  LBB0_1526:
  7535  	RET
  7536  
  7537  LBB0_1098:
  7538  	LONG $0xfce78348         // and    rdi, -4
  7539  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7540  	WORD $0xc031             // xor    eax, eax
  7541  
  7542  LBB0_1099:
  7543  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
  7544  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
  7545  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
  7546  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
  7547  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
  7548  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
  7549  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
  7550  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
  7551  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
  7552  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
  7553  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
  7554  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
  7555  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
  7556  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
  7557  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
  7558  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
  7559  	LONG $0x20c08348               // add    rax, 32
  7560  	LONG $0x04c78348               // add    rdi, 4
  7561  	JNE  LBB0_1099
  7562  
  7563  LBB0_1100:
  7564  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7565  	JE   LBB0_1103
  7566  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
  7567  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7568  
  7569  LBB0_1102:
  7570  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7571  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7572  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7573  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7574  	LONG $0x20c08348               // add    rax, 32
  7575  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7576  	JNE  LBB0_1102
  7577  
  7578  LBB0_1103:
  7579  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7580  	JE   LBB0_1526
  7581  	JMP  LBB0_1104
  7582  
  7583  LBB0_1108:
  7584  	LONG $0xfce78348         // and    rdi, -4
  7585  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7586  	WORD $0xc031             // xor    eax, eax
  7587  
  7588  LBB0_1109:
  7589  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
  7590  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
  7591  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
  7592  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
  7593  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
  7594  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
  7595  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
  7596  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
  7597  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
  7598  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
  7599  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
  7600  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
  7601  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
  7602  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
  7603  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
  7604  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
  7605  	LONG $0x20c08348               // add    rax, 32
  7606  	LONG $0x04c78348               // add    rdi, 4
  7607  	JNE  LBB0_1109
  7608  
  7609  LBB0_1110:
  7610  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7611  	JE   LBB0_1113
  7612  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
  7613  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7614  
  7615  LBB0_1112:
  7616  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7617  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7618  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7619  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7620  	LONG $0x20c08348               // add    rax, 32
  7621  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7622  	JNE  LBB0_1112
  7623  
  7624  LBB0_1113:
  7625  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7626  	JE   LBB0_1526
  7627  	JMP  LBB0_1114
  7628  
  7629  LBB0_1118:
  7630  	LONG $0xfce78348         // and    rdi, -4
  7631  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7632  	WORD $0xc031             // xor    eax, eax
  7633  
  7634  LBB0_1119:
  7635  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
  7636  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
  7637  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
  7638  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
  7639  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
  7640  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
  7641  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
  7642  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
  7643  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
  7644  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
  7645  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
  7646  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
  7647  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
  7648  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
  7649  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
  7650  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
  7651  	LONG $0x10c08348               // add    rax, 16
  7652  	LONG $0x04c78348               // add    rdi, 4
  7653  	JNE  LBB0_1119
  7654  
  7655  LBB0_1120:
  7656  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7657  	JE   LBB0_1123
  7658  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
  7659  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7660  
  7661  LBB0_1122:
  7662  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7663  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7664  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7665  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7666  	LONG $0x20c08348               // add    rax, 32
  7667  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7668  	JNE  LBB0_1122
  7669  
  7670  LBB0_1123:
  7671  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7672  	JE   LBB0_1526
  7673  	JMP  LBB0_1124
  7674  
  7675  LBB0_1128:
  7676  	LONG $0xfce78348         // and    rdi, -4
  7677  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7678  	WORD $0xc031             // xor    eax, eax
  7679  
  7680  LBB0_1129:
  7681  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
  7682  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
  7683  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
  7684  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
  7685  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
  7686  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
  7687  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
  7688  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
  7689  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
  7690  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
  7691  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
  7692  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
  7693  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
  7694  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
  7695  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
  7696  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
  7697  	LONG $0x80e88348               // sub    rax, -128
  7698  	LONG $0x04c78348               // add    rdi, 4
  7699  	JNE  LBB0_1129
  7700  
  7701  LBB0_1130:
  7702  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7703  	JE   LBB0_1133
  7704  	LONG $0x10c08348         // add    rax, 16
  7705  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7706  
  7707  LBB0_1132:
  7708  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7709  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7710  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7711  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7712  	LONG $0x20c08348               // add    rax, 32
  7713  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7714  	JNE  LBB0_1132
  7715  
  7716  LBB0_1133:
  7717  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7718  	JE   LBB0_1526
  7719  	JMP  LBB0_1134
  7720  
  7721  LBB0_1138:
  7722  	LONG $0xfce78348         // and    rdi, -4
  7723  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7724  	WORD $0xc031             // xor    eax, eax
  7725  
  7726  LBB0_1139:
  7727  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
  7728  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
  7729  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
  7730  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
  7731  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
  7732  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
  7733  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
  7734  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
  7735  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
  7736  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
  7737  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
  7738  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
  7739  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
  7740  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
  7741  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
  7742  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
  7743  	LONG $0x80e88348               // sub    rax, -128
  7744  	LONG $0x04c78348               // add    rdi, 4
  7745  	JNE  LBB0_1139
  7746  
  7747  LBB0_1140:
  7748  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7749  	JE   LBB0_1143
  7750  	LONG $0x10c08348         // add    rax, 16
  7751  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7752  
  7753  LBB0_1142:
  7754  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7755  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7756  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7757  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7758  	LONG $0x20c08348               // add    rax, 32
  7759  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7760  	JNE  LBB0_1142
  7761  
  7762  LBB0_1143:
  7763  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7764  	JE   LBB0_1526
  7765  	JMP  LBB0_1144
  7766  
  7767  LBB0_1148:
  7768  	LONG $0xfce78348         // and    rdi, -4
  7769  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7770  	WORD $0xc031             // xor    eax, eax
  7771  
  7772  LBB0_1149:
  7773  	LONG $0x22380f66; WORD $0x0204             // pmovsxbq    xmm0, word [rdx + rax]
  7774  	LONG $0x22380f66; WORD $0x024c; BYTE $0x02 // pmovsxbq    xmm1, word [rdx + rax + 2]
  7775  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  7776  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  7777  	LONG $0x22380f66; WORD $0x0244; BYTE $0x04 // pmovsxbq    xmm0, word [rdx + rax + 4]
  7778  	LONG $0x22380f66; WORD $0x024c; BYTE $0x06 // pmovsxbq    xmm1, word [rdx + rax + 6]
  7779  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  7780  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  7781  	LONG $0x22380f66; WORD $0x0244; BYTE $0x08 // pmovsxbq    xmm0, word [rdx + rax + 8]
  7782  	LONG $0x22380f66; WORD $0x024c; BYTE $0x0a // pmovsxbq    xmm1, word [rdx + rax + 10]
  7783  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  7784  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  7785  	LONG $0x22380f66; WORD $0x0244; BYTE $0x0c // pmovsxbq    xmm0, word [rdx + rax + 12]
  7786  	LONG $0x22380f66; WORD $0x024c; BYTE $0x0e // pmovsxbq    xmm1, word [rdx + rax + 14]
  7787  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  7788  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  7789  	LONG $0x10c08348                           // add    rax, 16
  7790  	LONG $0x04c78348                           // add    rdi, 4
  7791  	JNE  LBB0_1149
  7792  
  7793  LBB0_1150:
  7794  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7795  	JE   LBB0_1153
  7796  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  7797  	LONG $0x10c78348         // add    rdi, 16
  7798  	LONG $0x10148d4c         // lea    r10, [rax + rdx]
  7799  	LONG $0x02c28349         // add    r10, 2
  7800  	WORD $0xc031             // xor    eax, eax
  7801  
  7802  LBB0_1152:
  7803  	QUAD $0xfe824422380f4166                   // pmovsxbq    xmm0, word [r10 + 4*rax - 2]
  7804  	LONG $0x380f4166; WORD $0x0c22; BYTE $0x82 // pmovsxbq    xmm1, word [r10 + 4*rax]
  7805  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  7806  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  7807  	LONG $0x20c78348                           // add    rdi, 32
  7808  	LONG $0x01c08348                           // add    rax, 1
  7809  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  7810  	JNE  LBB0_1152
  7811  
  7812  LBB0_1153:
  7813  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7814  	JE   LBB0_1526
  7815  	JMP  LBB0_1154
  7816  
  7817  LBB0_1158:
  7818  	LONG $0xfce78348         // and    rdi, -4
  7819  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7820  	WORD $0xc031             // xor    eax, eax
  7821  
  7822  LBB0_1159:
  7823  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
  7824  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
  7825  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
  7826  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
  7827  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
  7828  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
  7829  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
  7830  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
  7831  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
  7832  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
  7833  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
  7834  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
  7835  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
  7836  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
  7837  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
  7838  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
  7839  	LONG $0x10c08348               // add    rax, 16
  7840  	LONG $0x04c78348               // add    rdi, 4
  7841  	JNE  LBB0_1159
  7842  
  7843  LBB0_1160:
  7844  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7845  	JE   LBB0_1163
  7846  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
  7847  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7848  
  7849  LBB0_1162:
  7850  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7851  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7852  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7853  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7854  	LONG $0x20c08348               // add    rax, 32
  7855  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7856  	JNE  LBB0_1162
  7857  
  7858  LBB0_1163:
  7859  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7860  	JE   LBB0_1526
  7861  	JMP  LBB0_1164
  7862  
  7863  LBB0_1168:
  7864  	LONG $0xfce78348         // and    rdi, -4
  7865  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7866  	WORD $0xc031             // xor    eax, eax
  7867  
  7868  LBB0_1169:
  7869  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
  7870  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
  7871  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
  7872  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
  7873  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
  7874  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
  7875  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
  7876  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
  7877  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
  7878  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
  7879  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
  7880  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
  7881  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
  7882  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
  7883  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
  7884  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
  7885  	LONG $0x10c08348               // add    rax, 16
  7886  	LONG $0x04c78348               // add    rdi, 4
  7887  	JNE  LBB0_1169
  7888  
  7889  LBB0_1170:
  7890  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7891  	JE   LBB0_1173
  7892  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
  7893  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7894  
  7895  LBB0_1172:
  7896  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7897  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7898  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7899  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7900  	LONG $0x20c08348               // add    rax, 32
  7901  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7902  	JNE  LBB0_1172
  7903  
  7904  LBB0_1173:
  7905  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7906  	JE   LBB0_1526
  7907  	JMP  LBB0_1174
  7908  
  7909  LBB0_1178:
  7910  	LONG $0xfce78348         // and    rdi, -4
  7911  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7912  	WORD $0xc031             // xor    eax, eax
  7913  
  7914  LBB0_1179:
  7915  	LONG $0x32380f66; WORD $0x0204             // pmovzxbq    xmm0, word [rdx + rax]
  7916  	LONG $0x32380f66; WORD $0x024c; BYTE $0x02 // pmovzxbq    xmm1, word [rdx + rax + 2]
  7917  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  7918  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  7919  	LONG $0x32380f66; WORD $0x0244; BYTE $0x04 // pmovzxbq    xmm0, word [rdx + rax + 4]
  7920  	LONG $0x32380f66; WORD $0x024c; BYTE $0x06 // pmovzxbq    xmm1, word [rdx + rax + 6]
  7921  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  7922  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  7923  	LONG $0x32380f66; WORD $0x0244; BYTE $0x08 // pmovzxbq    xmm0, word [rdx + rax + 8]
  7924  	LONG $0x32380f66; WORD $0x024c; BYTE $0x0a // pmovzxbq    xmm1, word [rdx + rax + 10]
  7925  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  7926  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  7927  	LONG $0x32380f66; WORD $0x0244; BYTE $0x0c // pmovzxbq    xmm0, word [rdx + rax + 12]
  7928  	LONG $0x32380f66; WORD $0x024c; BYTE $0x0e // pmovzxbq    xmm1, word [rdx + rax + 14]
  7929  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  7930  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  7931  	LONG $0x10c08348                           // add    rax, 16
  7932  	LONG $0x04c78348                           // add    rdi, 4
  7933  	JNE  LBB0_1179
  7934  
  7935  LBB0_1180:
  7936  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7937  	JE   LBB0_1183
  7938  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  7939  	LONG $0x10c78348         // add    rdi, 16
  7940  	LONG $0x10148d4c         // lea    r10, [rax + rdx]
  7941  	LONG $0x02c28349         // add    r10, 2
  7942  	WORD $0xc031             // xor    eax, eax
  7943  
  7944  LBB0_1182:
  7945  	QUAD $0xfe824432380f4166                   // pmovzxbq    xmm0, word [r10 + 4*rax - 2]
  7946  	LONG $0x380f4166; WORD $0x0c32; BYTE $0x82 // pmovzxbq    xmm1, word [r10 + 4*rax]
  7947  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  7948  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  7949  	LONG $0x20c78348                           // add    rdi, 32
  7950  	LONG $0x01c08348                           // add    rax, 1
  7951  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  7952  	JNE  LBB0_1182
  7953  
  7954  LBB0_1183:
  7955  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  7956  	JE   LBB0_1526
  7957  	JMP  LBB0_1184
  7958  
  7959  LBB0_1188:
  7960  	LONG $0xfce78348         // and    rdi, -4
  7961  	WORD $0xf748; BYTE $0xdf // neg    rdi
  7962  	WORD $0xc031             // xor    eax, eax
  7963  
  7964  LBB0_1189:
  7965  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
  7966  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
  7967  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
  7968  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
  7969  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
  7970  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
  7971  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
  7972  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
  7973  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
  7974  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
  7975  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
  7976  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
  7977  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
  7978  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
  7979  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
  7980  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
  7981  	LONG $0x40c08348               // add    rax, 64
  7982  	LONG $0x04c78348               // add    rdi, 4
  7983  	JNE  LBB0_1189
  7984  
  7985  LBB0_1190:
  7986  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  7987  	JE   LBB0_1193
  7988  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
  7989  	LONG $0x10c08348         // add    rax, 16
  7990  	WORD $0xf749; BYTE $0xd8 // neg    r8
  7991  
  7992  LBB0_1192:
  7993  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  7994  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  7995  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  7996  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  7997  	LONG $0x20c08348               // add    rax, 32
  7998  	WORD $0xff49; BYTE $0xc0       // inc    r8
  7999  	JNE  LBB0_1192
  8000  
  8001  LBB0_1193:
  8002  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8003  	JE   LBB0_1526
  8004  	JMP  LBB0_1194
  8005  
  8006  LBB0_1198:
  8007  	LONG $0xfce78348         // and    rdi, -4
  8008  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8009  	WORD $0xc031             // xor    eax, eax
  8010  
  8011  LBB0_1199:
  8012  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
  8013  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
  8014  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
  8015  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
  8016  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
  8017  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
  8018  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
  8019  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
  8020  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
  8021  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
  8022  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
  8023  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
  8024  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
  8025  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
  8026  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
  8027  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
  8028  	LONG $0x40c08348               // add    rax, 64
  8029  	LONG $0x04c78348               // add    rdi, 4
  8030  	JNE  LBB0_1199
  8031  
  8032  LBB0_1200:
  8033  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8034  	JE   LBB0_1203
  8035  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
  8036  	LONG $0x10c08348         // add    rax, 16
  8037  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8038  
  8039  LBB0_1202:
  8040  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8041  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8042  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8043  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8044  	LONG $0x20c08348               // add    rax, 32
  8045  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8046  	JNE  LBB0_1202
  8047  
  8048  LBB0_1203:
  8049  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8050  	JE   LBB0_1526
  8051  	JMP  LBB0_1204
  8052  
  8053  LBB0_1208:
  8054  	LONG $0xfce78348         // and    rdi, -4
  8055  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8056  	WORD $0xc031             // xor    eax, eax
  8057  
  8058  LBB0_1209:
  8059  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
  8060  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
  8061  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
  8062  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
  8063  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
  8064  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
  8065  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
  8066  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
  8067  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
  8068  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
  8069  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
  8070  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
  8071  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
  8072  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
  8073  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
  8074  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
  8075  	LONG $0x40c08348               // add    rax, 64
  8076  	LONG $0x04c78348               // add    rdi, 4
  8077  	JNE  LBB0_1209
  8078  
  8079  LBB0_1210:
  8080  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8081  	JE   LBB0_1213
  8082  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
  8083  	LONG $0x10c08348         // add    rax, 16
  8084  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8085  
  8086  LBB0_1212:
  8087  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8088  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8089  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8090  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8091  	LONG $0x20c08348               // add    rax, 32
  8092  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8093  	JNE  LBB0_1212
  8094  
  8095  LBB0_1213:
  8096  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8097  	JE   LBB0_1526
  8098  	JMP  LBB0_1214
  8099  
  8100  LBB0_1218:
  8101  	LONG $0xfce78348         // and    rdi, -4
  8102  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8103  	WORD $0xc031             // xor    eax, eax
  8104  
  8105  LBB0_1219:
  8106  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
  8107  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
  8108  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
  8109  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
  8110  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
  8111  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
  8112  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
  8113  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
  8114  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
  8115  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
  8116  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
  8117  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
  8118  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
  8119  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
  8120  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
  8121  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
  8122  	LONG $0x40c08348               // add    rax, 64
  8123  	LONG $0x04c78348               // add    rdi, 4
  8124  	JNE  LBB0_1219
  8125  
  8126  LBB0_1220:
  8127  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8128  	JE   LBB0_1223
  8129  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
  8130  	LONG $0x10c08348         // add    rax, 16
  8131  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8132  
  8133  LBB0_1222:
  8134  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8135  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8136  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8137  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8138  	LONG $0x20c08348               // add    rax, 32
  8139  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8140  	JNE  LBB0_1222
  8141  
  8142  LBB0_1223:
  8143  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8144  	JE   LBB0_1526
  8145  	JMP  LBB0_1224
  8146  
  8147  LBB0_1228:
  8148  	LONG $0xfce78348         // and    rdi, -4
  8149  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8150  	WORD $0xc031             // xor    eax, eax
  8151  
  8152  LBB0_1229:
  8153  	LONG $0x22380f66; WORD $0x0204             // pmovsxbq    xmm0, word [rdx + rax]
  8154  	LONG $0x22380f66; WORD $0x024c; BYTE $0x02 // pmovsxbq    xmm1, word [rdx + rax + 2]
  8155  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  8156  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  8157  	LONG $0x22380f66; WORD $0x0244; BYTE $0x04 // pmovsxbq    xmm0, word [rdx + rax + 4]
  8158  	LONG $0x22380f66; WORD $0x024c; BYTE $0x06 // pmovsxbq    xmm1, word [rdx + rax + 6]
  8159  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  8160  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  8161  	LONG $0x22380f66; WORD $0x0244; BYTE $0x08 // pmovsxbq    xmm0, word [rdx + rax + 8]
  8162  	LONG $0x22380f66; WORD $0x024c; BYTE $0x0a // pmovsxbq    xmm1, word [rdx + rax + 10]
  8163  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  8164  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  8165  	LONG $0x22380f66; WORD $0x0244; BYTE $0x0c // pmovsxbq    xmm0, word [rdx + rax + 12]
  8166  	LONG $0x22380f66; WORD $0x024c; BYTE $0x0e // pmovsxbq    xmm1, word [rdx + rax + 14]
  8167  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  8168  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  8169  	LONG $0x10c08348                           // add    rax, 16
  8170  	LONG $0x04c78348                           // add    rdi, 4
  8171  	JNE  LBB0_1229
  8172  
  8173  LBB0_1230:
  8174  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8175  	JE   LBB0_1233
  8176  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  8177  	LONG $0x10c78348         // add    rdi, 16
  8178  	LONG $0x10148d4c         // lea    r10, [rax + rdx]
  8179  	LONG $0x02c28349         // add    r10, 2
  8180  	WORD $0xc031             // xor    eax, eax
  8181  
  8182  LBB0_1232:
  8183  	QUAD $0xfe824422380f4166                   // pmovsxbq    xmm0, word [r10 + 4*rax - 2]
  8184  	LONG $0x380f4166; WORD $0x0c22; BYTE $0x82 // pmovsxbq    xmm1, word [r10 + 4*rax]
  8185  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  8186  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  8187  	LONG $0x20c78348                           // add    rdi, 32
  8188  	LONG $0x01c08348                           // add    rax, 1
  8189  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  8190  	JNE  LBB0_1232
  8191  
  8192  LBB0_1233:
  8193  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8194  	JE   LBB0_1526
  8195  	JMP  LBB0_1234
  8196  
  8197  LBB0_1238:
  8198  	LONG $0xfce78348         // and    rdi, -4
  8199  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8200  	WORD $0xc031             // xor    eax, eax
  8201  
  8202  LBB0_1239:
  8203  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
  8204  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
  8205  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
  8206  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
  8207  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
  8208  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
  8209  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
  8210  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
  8211  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
  8212  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
  8213  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
  8214  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
  8215  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
  8216  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
  8217  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
  8218  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
  8219  	LONG $0x10c08348               // add    rax, 16
  8220  	LONG $0x04c78348               // add    rdi, 4
  8221  	JNE  LBB0_1239
  8222  
  8223  LBB0_1240:
  8224  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8225  	JE   LBB0_1243
  8226  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
  8227  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8228  
  8229  LBB0_1242:
  8230  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8231  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8232  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8233  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8234  	LONG $0x20c08348               // add    rax, 32
  8235  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8236  	JNE  LBB0_1242
  8237  
  8238  LBB0_1243:
  8239  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8240  	JE   LBB0_1526
  8241  	JMP  LBB0_1244
  8242  
  8243  LBB0_1248:
  8244  	LONG $0xfce78348         // and    rdi, -4
  8245  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8246  	WORD $0xc031             // xor    eax, eax
  8247  
  8248  LBB0_1249:
  8249  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
  8250  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
  8251  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
  8252  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
  8253  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
  8254  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
  8255  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
  8256  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
  8257  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
  8258  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
  8259  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
  8260  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
  8261  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
  8262  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
  8263  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
  8264  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
  8265  	LONG $0x10c08348               // add    rax, 16
  8266  	LONG $0x04c78348               // add    rdi, 4
  8267  	JNE  LBB0_1249
  8268  
  8269  LBB0_1250:
  8270  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8271  	JE   LBB0_1253
  8272  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
  8273  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8274  
  8275  LBB0_1252:
  8276  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8277  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8278  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8279  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8280  	LONG $0x20c08348               // add    rax, 32
  8281  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8282  	JNE  LBB0_1252
  8283  
  8284  LBB0_1253:
  8285  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8286  	JE   LBB0_1526
  8287  	JMP  LBB0_1254
  8288  
  8289  LBB0_1258:
  8290  	LONG $0xfce78348         // and    rdi, -4
  8291  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8292  	WORD $0xc031             // xor    eax, eax
  8293  
  8294  LBB0_1259:
  8295  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
  8296  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
  8297  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
  8298  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
  8299  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
  8300  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
  8301  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
  8302  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
  8303  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
  8304  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
  8305  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
  8306  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
  8307  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
  8308  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
  8309  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
  8310  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
  8311  	LONG $0x20c08348               // add    rax, 32
  8312  	LONG $0x04c78348               // add    rdi, 4
  8313  	JNE  LBB0_1259
  8314  
  8315  LBB0_1260:
  8316  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8317  	JE   LBB0_1263
  8318  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
  8319  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8320  
  8321  LBB0_1262:
  8322  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8323  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8324  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8325  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8326  	LONG $0x20c08348               // add    rax, 32
  8327  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8328  	JNE  LBB0_1262
  8329  
  8330  LBB0_1263:
  8331  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8332  	JE   LBB0_1526
  8333  	JMP  LBB0_1264
  8334  
  8335  LBB0_1268:
  8336  	LONG $0xfce78348         // and    rdi, -4
  8337  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8338  	WORD $0xc031             // xor    eax, eax
  8339  
  8340  LBB0_1269:
  8341  	LONG $0x32380f66; WORD $0x0204             // pmovzxbq    xmm0, word [rdx + rax]
  8342  	LONG $0x32380f66; WORD $0x024c; BYTE $0x02 // pmovzxbq    xmm1, word [rdx + rax + 2]
  8343  	LONG $0x047f0ff3; BYTE $0xc1               // movdqu    oword [rcx + 8*rax], xmm0
  8344  	LONG $0x4c7f0ff3; WORD $0x10c1             // movdqu    oword [rcx + 8*rax + 16], xmm1
  8345  	LONG $0x32380f66; WORD $0x0244; BYTE $0x04 // pmovzxbq    xmm0, word [rdx + rax + 4]
  8346  	LONG $0x32380f66; WORD $0x024c; BYTE $0x06 // pmovzxbq    xmm1, word [rdx + rax + 6]
  8347  	LONG $0x447f0ff3; WORD $0x20c1             // movdqu    oword [rcx + 8*rax + 32], xmm0
  8348  	LONG $0x4c7f0ff3; WORD $0x30c1             // movdqu    oword [rcx + 8*rax + 48], xmm1
  8349  	LONG $0x32380f66; WORD $0x0244; BYTE $0x08 // pmovzxbq    xmm0, word [rdx + rax + 8]
  8350  	LONG $0x32380f66; WORD $0x024c; BYTE $0x0a // pmovzxbq    xmm1, word [rdx + rax + 10]
  8351  	LONG $0x447f0ff3; WORD $0x40c1             // movdqu    oword [rcx + 8*rax + 64], xmm0
  8352  	LONG $0x4c7f0ff3; WORD $0x50c1             // movdqu    oword [rcx + 8*rax + 80], xmm1
  8353  	LONG $0x32380f66; WORD $0x0244; BYTE $0x0c // pmovzxbq    xmm0, word [rdx + rax + 12]
  8354  	LONG $0x32380f66; WORD $0x024c; BYTE $0x0e // pmovzxbq    xmm1, word [rdx + rax + 14]
  8355  	LONG $0x447f0ff3; WORD $0x60c1             // movdqu    oword [rcx + 8*rax + 96], xmm0
  8356  	LONG $0x4c7f0ff3; WORD $0x70c1             // movdqu    oword [rcx + 8*rax + 112], xmm1
  8357  	LONG $0x10c08348                           // add    rax, 16
  8358  	LONG $0x04c78348                           // add    rdi, 4
  8359  	JNE  LBB0_1269
  8360  
  8361  LBB0_1270:
  8362  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8363  	JE   LBB0_1273
  8364  	LONG $0xc13c8d48         // lea    rdi, [rcx + 8*rax]
  8365  	LONG $0x10c78348         // add    rdi, 16
  8366  	LONG $0x10148d4c         // lea    r10, [rax + rdx]
  8367  	LONG $0x02c28349         // add    r10, 2
  8368  	WORD $0xc031             // xor    eax, eax
  8369  
  8370  LBB0_1272:
  8371  	QUAD $0xfe824432380f4166                   // pmovzxbq    xmm0, word [r10 + 4*rax - 2]
  8372  	LONG $0x380f4166; WORD $0x0c32; BYTE $0x82 // pmovzxbq    xmm1, word [r10 + 4*rax]
  8373  	LONG $0x477f0ff3; BYTE $0xf0               // movdqu    oword [rdi - 16], xmm0
  8374  	LONG $0x0f7f0ff3                           // movdqu    oword [rdi], xmm1
  8375  	LONG $0x20c78348                           // add    rdi, 32
  8376  	LONG $0x01c08348                           // add    rax, 1
  8377  	WORD $0x3949; BYTE $0xc0                   // cmp    r8, rax
  8378  	JNE  LBB0_1272
  8379  
  8380  LBB0_1273:
  8381  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8382  	JE   LBB0_1526
  8383  	JMP  LBB0_1274
  8384  
  8385  LBB0_1278:
  8386  	LONG $0xfce78348         // and    rdi, -4
  8387  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8388  	WORD $0xc031             // xor    eax, eax
  8389  
  8390  LBB0_1279:
  8391  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
  8392  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
  8393  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
  8394  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
  8395  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
  8396  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
  8397  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
  8398  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
  8399  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
  8400  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
  8401  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
  8402  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
  8403  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
  8404  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
  8405  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
  8406  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
  8407  	LONG $0x80e88348               // sub    rax, -128
  8408  	LONG $0x04c78348               // add    rdi, 4
  8409  	JNE  LBB0_1279
  8410  
  8411  LBB0_1280:
  8412  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8413  	JE   LBB0_1283
  8414  	LONG $0x10c08348         // add    rax, 16
  8415  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8416  
  8417  LBB0_1282:
  8418  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8419  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8420  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8421  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8422  	LONG $0x20c08348               // add    rax, 32
  8423  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8424  	JNE  LBB0_1282
  8425  
  8426  LBB0_1283:
  8427  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8428  	JE   LBB0_1526
  8429  	JMP  LBB0_1284
  8430  
  8431  LBB0_1288:
  8432  	LONG $0xfce78348         // and    rdi, -4
  8433  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8434  	WORD $0xc031             // xor    eax, eax
  8435  
  8436  LBB0_1289:
  8437  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
  8438  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
  8439  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
  8440  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
  8441  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
  8442  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
  8443  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
  8444  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
  8445  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
  8446  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
  8447  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
  8448  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
  8449  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
  8450  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
  8451  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
  8452  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
  8453  	LONG $0x80e88348               // sub    rax, -128
  8454  	LONG $0x04c78348               // add    rdi, 4
  8455  	JNE  LBB0_1289
  8456  
  8457  LBB0_1290:
  8458  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8459  	JE   LBB0_1293
  8460  	LONG $0x10c08348         // add    rax, 16
  8461  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8462  
  8463  LBB0_1292:
  8464  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8465  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8466  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8467  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8468  	LONG $0x20c08348               // add    rax, 32
  8469  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8470  	JNE  LBB0_1292
  8471  
  8472  LBB0_1293:
  8473  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8474  	JE   LBB0_1526
  8475  	JMP  LBB0_1294
  8476  
  8477  LBB0_1298:
  8478  	LONG $0xfce78348         // and    rdi, -4
  8479  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8480  	WORD $0xc031             // xor    eax, eax
  8481  
  8482  LBB0_1299:
  8483  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
  8484  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
  8485  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
  8486  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
  8487  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
  8488  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
  8489  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
  8490  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
  8491  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
  8492  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
  8493  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
  8494  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
  8495  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
  8496  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
  8497  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
  8498  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
  8499  	LONG $0x20c08348               // add    rax, 32
  8500  	LONG $0x04c78348               // add    rdi, 4
  8501  	JNE  LBB0_1299
  8502  
  8503  LBB0_1300:
  8504  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8505  	JE   LBB0_1303
  8506  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
  8507  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8508  
  8509  LBB0_1302:
  8510  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8511  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8512  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8513  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8514  	LONG $0x20c08348               // add    rax, 32
  8515  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8516  	JNE  LBB0_1302
  8517  
  8518  LBB0_1303:
  8519  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8520  	JE   LBB0_1526
  8521  	JMP  LBB0_1304
  8522  
  8523  LBB0_1308:
  8524  	LONG $0xfce78348         // and    rdi, -4
  8525  	WORD $0xf748; BYTE $0xdf // neg    rdi
  8526  	WORD $0xc031             // xor    eax, eax
  8527  
  8528  LBB0_1309:
  8529  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
  8530  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
  8531  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
  8532  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
  8533  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
  8534  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
  8535  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
  8536  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
  8537  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
  8538  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
  8539  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
  8540  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
  8541  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
  8542  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
  8543  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
  8544  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
  8545  	LONG $0x20c08348               // add    rax, 32
  8546  	LONG $0x04c78348               // add    rdi, 4
  8547  	JNE  LBB0_1309
  8548  
  8549  LBB0_1310:
  8550  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
  8551  	JE   LBB0_1313
  8552  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
  8553  	WORD $0xf749; BYTE $0xd8 // neg    r8
  8554  
  8555  LBB0_1312:
  8556  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
  8557  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
  8558  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
  8559  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
  8560  	LONG $0x20c08348               // add    rax, 32
  8561  	WORD $0xff49; BYTE $0xc0       // inc    r8
  8562  	JNE  LBB0_1312
  8563  
  8564  LBB0_1313:
  8565  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8566  	JE   LBB0_1526
  8567  	JMP  LBB0_1314
  8568  
  8569  LBB0_1318:
  8570  	WORD $0xff31 // xor    edi, edi
  8571  
  8572  LBB0_1319:
  8573  	LONG $0x01c0f641                           // test    r8b, 1
  8574  	JE   LBB0_1321
  8575  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  8576  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  8577  	LONG $0x556f0f66; BYTE $0x40               // movdqa    xmm2, oword 64[rbp] /* [rip + .LCPI0_5] */
  8578  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8579  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8580  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8581  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  8582  
  8583  LBB0_1321:
  8584  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8585  	JE   LBB0_1526
  8586  	JMP  LBB0_1322
  8587  
  8588  LBB0_1326:
  8589  	WORD $0xff31 // xor    edi, edi
  8590  
  8591  LBB0_1327:
  8592  	LONG $0x01c0f641               // test    r8b, 1
  8593  	JE   LBB0_1329
  8594  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
  8595  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  8596  	QUAD $0x00000100956f0f66       // movdqa    xmm2, oword 256[rbp] /* [rip + .LCPI0_17] */
  8597  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8598  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8599  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  8600  	LONG $0x047f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm0
  8601  
  8602  LBB0_1329:
  8603  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8604  	JE   LBB0_1526
  8605  	JMP  LBB0_1330
  8606  
  8607  LBB0_1334:
  8608  	WORD $0xff31 // xor    edi, edi
  8609  
  8610  LBB0_1335:
  8611  	LONG $0x01c0f641                           // test    r8b, 1
  8612  	JE   LBB0_1337
  8613  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  8614  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  8615  	LONG $0x556f0f66; BYTE $0x40               // movdqa    xmm2, oword 64[rbp] /* [rip + .LCPI0_5] */
  8616  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8617  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8618  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8619  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  8620  
  8621  LBB0_1337:
  8622  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8623  	JE   LBB0_1526
  8624  	JMP  LBB0_1338
  8625  
  8626  LBB0_1342:
  8627  	WORD $0xff31 // xor    edi, edi
  8628  
  8629  LBB0_1343:
  8630  	LONG $0x01c0f641               // test    r8b, 1
  8631  	JE   LBB0_1345
  8632  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  8633  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  8634  	QUAD $0x000000c0956f0f66       // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI0_13] */
  8635  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8636  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8637  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  8638  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  8639  
  8640  LBB0_1345:
  8641  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8642  	JE   LBB0_1526
  8643  	JMP  LBB0_1346
  8644  
  8645  LBB0_1350:
  8646  	WORD $0xff31 // xor    edi, edi
  8647  
  8648  LBB0_1351:
  8649  	LONG $0x01c0f641                           // test    r8b, 1
  8650  	JE   LBB0_1353
  8651  	LONG $0x20380f66; WORD $0x3a04             // pmovsxbw    xmm0, qword [rdx + rdi]
  8652  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbw    xmm1, qword [rdx + rdi + 8]
  8653  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  8654  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  8655  
  8656  LBB0_1353:
  8657  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8658  	JE   LBB0_1526
  8659  	JMP  LBB0_1354
  8660  
  8661  LBB0_1358:
  8662  	WORD $0xff31 // xor    edi, edi
  8663  
  8664  LBB0_1359:
  8665  	LONG $0x01c0f641                           // test    r8b, 1
  8666  	JE   LBB0_1361
  8667  	LONG $0x20380f66; WORD $0x3a04             // pmovsxbw    xmm0, qword [rdx + rdi]
  8668  	LONG $0x20380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbw    xmm1, qword [rdx + rdi + 8]
  8669  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  8670  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  8671  
  8672  LBB0_1361:
  8673  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8674  	JE   LBB0_1526
  8675  	JMP  LBB0_1362
  8676  
  8677  LBB0_1366:
  8678  	WORD $0xff31 // xor    edi, edi
  8679  
  8680  LBB0_1367:
  8681  	LONG $0x01c0f641                           // test    r8b, 1
  8682  	JE   LBB0_1369
  8683  	LONG $0x30380f66; WORD $0x3a04             // pmovzxbw    xmm0, qword [rdx + rdi]
  8684  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x08 // pmovzxbw    xmm1, qword [rdx + rdi + 8]
  8685  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  8686  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  8687  
  8688  LBB0_1369:
  8689  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8690  	JE   LBB0_1526
  8691  	JMP  LBB0_1370
  8692  
  8693  LBB0_1374:
  8694  	WORD $0xff31 // xor    edi, edi
  8695  
  8696  LBB0_1375:
  8697  	LONG $0x01c0f641                           // test    r8b, 1
  8698  	JE   LBB0_1377
  8699  	LONG $0x30380f66; WORD $0x3a04             // pmovzxbw    xmm0, qword [rdx + rdi]
  8700  	LONG $0x30380f66; WORD $0x3a4c; BYTE $0x08 // pmovzxbw    xmm1, qword [rdx + rdi + 8]
  8701  	LONG $0x047f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm0
  8702  	LONG $0x4c7f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm1
  8703  
  8704  LBB0_1377:
  8705  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8706  	JE   LBB0_1526
  8707  	JMP  LBB0_1378
  8708  
  8709  LBB0_1382:
  8710  	WORD $0xff31 // xor    edi, edi
  8711  
  8712  LBB0_1383:
  8713  	LONG $0x01c0f641                           // test    r8b, 1
  8714  	JE   LBB0_1385
  8715  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  8716  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  8717  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  8718  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  8719  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  8720  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  8721  
  8722  LBB0_1385:
  8723  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8724  	JE   LBB0_1526
  8725  	JMP  LBB0_1386
  8726  
  8727  LBB0_1390:
  8728  	WORD $0xff31 // xor    edi, edi
  8729  
  8730  LBB0_1391:
  8731  	LONG $0x01c0f641                           // test    r8b, 1
  8732  	JE   LBB0_1393
  8733  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  8734  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  8735  	WORD $0x5b0f; BYTE $0xc0                   // cvtdq2ps    xmm0, xmm0
  8736  	WORD $0x5b0f; BYTE $0xc9                   // cvtdq2ps    xmm1, xmm1
  8737  	LONG $0xb904110f                           // movups    oword [rcx + 4*rdi], xmm0
  8738  	LONG $0xb94c110f; BYTE $0x10               // movups    oword [rcx + 4*rdi + 16], xmm1
  8739  
  8740  LBB0_1393:
  8741  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8742  	JE   LBB0_1526
  8743  	JMP  LBB0_1394
  8744  
  8745  LBB0_1398:
  8746  	WORD $0xff31 // xor    edi, edi
  8747  
  8748  LBB0_1399:
  8749  	LONG $0x01c0f641               // test    r8b, 1
  8750  	JE   LBB0_1401
  8751  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  8752  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  8753  	QUAD $0x000000c0956f0f66       // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI0_13] */
  8754  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8755  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8756  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  8757  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  8758  
  8759  LBB0_1401:
  8760  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8761  	JE   LBB0_1526
  8762  	JMP  LBB0_1402
  8763  
  8764  LBB0_1406:
  8765  	WORD $0xff31 // xor    edi, edi
  8766  
  8767  LBB0_1407:
  8768  	LONG $0x01c0f641                           // test    r8b, 1
  8769  	JE   LBB0_1409
  8770  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  8771  	LONG $0xc0e60f66                           // cvttpd2dq    xmm0, xmm0
  8772  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  8773  	LONG $0x556f0f66; BYTE $0x10               // movdqa    xmm2, oword 16[rbp] /* [rip + .LCPI0_1] */
  8774  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  8775  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8776  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8777  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8778  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  8779  
  8780  LBB0_1409:
  8781  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8782  	JE   LBB0_1526
  8783  	JMP  LBB0_1410
  8784  
  8785  LBB0_1414:
  8786  	WORD $0xff31 // xor    edi, edi
  8787  
  8788  LBB0_1415:
  8789  	LONG $0x01c0f641                           // test    r8b, 1
  8790  	JE   LBB0_1417
  8791  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  8792  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  8793  	LONG $0x556f0f66; BYTE $0x40               // movdqa    xmm2, oword 64[rbp] /* [rip + .LCPI0_5] */
  8794  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8795  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8796  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8797  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  8798  
  8799  LBB0_1417:
  8800  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8801  	JE   LBB0_1526
  8802  	JMP  LBB0_1418
  8803  
  8804  LBB0_1422:
  8805  	WORD $0xff31 // xor    edi, edi
  8806  
  8807  LBB0_1423:
  8808  	LONG $0x01c0f641               // test    r8b, 1
  8809  	JE   LBB0_1425
  8810  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
  8811  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  8812  	QUAD $0x00000100956f0f66       // movdqa    xmm2, oword 256[rbp] /* [rip + .LCPI0_17] */
  8813  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8814  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8815  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  8816  	LONG $0x047f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm0
  8817  
  8818  LBB0_1425:
  8819  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8820  	JE   LBB0_1526
  8821  	JMP  LBB0_1426
  8822  
  8823  LBB0_1430:
  8824  	WORD $0xff31 // xor    edi, edi
  8825  
  8826  LBB0_1431:
  8827  	LONG $0x01c0f641               // test    r8b, 1
  8828  	JE   LBB0_1433
  8829  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
  8830  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  8831  	QUAD $0x00000100956f0f66       // movdqa    xmm2, oword 256[rbp] /* [rip + .LCPI0_17] */
  8832  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8833  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8834  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  8835  	LONG $0x047f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm0
  8836  
  8837  LBB0_1433:
  8838  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8839  	JE   LBB0_1526
  8840  	JMP  LBB0_1434
  8841  
  8842  LBB0_1438:
  8843  	WORD $0xff31 // xor    edi, edi
  8844  
  8845  LBB0_1439:
  8846  	LONG $0x01c0f641                           // test    r8b, 1
  8847  	JE   LBB0_1441
  8848  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  8849  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  8850  	LONG $0x556f0f66; BYTE $0x40               // movdqa    xmm2, oword 64[rbp] /* [rip + .LCPI0_5] */
  8851  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8852  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8853  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8854  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  8855  
  8856  LBB0_1441:
  8857  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8858  	JE   LBB0_1526
  8859  	JMP  LBB0_1442
  8860  
  8861  LBB0_1446:
  8862  	WORD $0xff31 // xor    edi, edi
  8863  
  8864  LBB0_1447:
  8865  	LONG $0x01c0f641               // test    r8b, 1
  8866  	JE   LBB0_1449
  8867  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  8868  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  8869  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  8870  	LONG $0x2b380f66; BYTE $0xc0   // packusdw    xmm0, xmm0
  8871  	LONG $0xc0670f66               // packuswb    xmm0, xmm0
  8872  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  8873  	LONG $0x2b380f66; BYTE $0xc9   // packusdw    xmm1, xmm1
  8874  	LONG $0xc9670f66               // packuswb    xmm1, xmm1
  8875  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  8876  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  8877  
  8878  LBB0_1449:
  8879  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8880  	JE   LBB0_1526
  8881  	JMP  LBB0_1450
  8882  
  8883  LBB0_1454:
  8884  	WORD $0xff31 // xor    edi, edi
  8885  
  8886  LBB0_1455:
  8887  	LONG $0x01c0f641               // test    r8b, 1
  8888  	JE   LBB0_1457
  8889  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  8890  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  8891  	QUAD $0x000000c0956f0f66       // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI0_13] */
  8892  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8893  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8894  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  8895  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  8896  
  8897  LBB0_1457:
  8898  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8899  	JE   LBB0_1526
  8900  	JMP  LBB0_1458
  8901  
  8902  LBB0_1462:
  8903  	WORD $0xff31 // xor    edi, edi
  8904  
  8905  LBB0_1463:
  8906  	LONG $0x01c0f641                           // test    r8b, 1
  8907  	JE   LBB0_1465
  8908  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  8909  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  8910  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  8911  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  8912  
  8913  LBB0_1465:
  8914  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8915  	JE   LBB0_1526
  8916  	JMP  LBB0_1466
  8917  
  8918  LBB0_1470:
  8919  	WORD $0xff31 // xor    edi, edi
  8920  
  8921  LBB0_1471:
  8922  	LONG $0x01c0f641                           // test    r8b, 1
  8923  	JE   LBB0_1473
  8924  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  8925  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  8926  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  8927  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  8928  
  8929  LBB0_1473:
  8930  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8931  	JE   LBB0_1526
  8932  	JMP  LBB0_1474
  8933  
  8934  LBB0_1478:
  8935  	WORD $0xff31 // xor    edi, edi
  8936  
  8937  LBB0_1479:
  8938  	LONG $0x01c0f641                           // test    r8b, 1
  8939  	JE   LBB0_1481
  8940  	LONG $0x21380f66; WORD $0x3a04             // pmovsxbd    xmm0, dword [rdx + rdi]
  8941  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x04 // pmovsxbd    xmm1, dword [rdx + rdi + 4]
  8942  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  8943  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  8944  
  8945  LBB0_1481:
  8946  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8947  	JE   LBB0_1526
  8948  	JMP  LBB0_1482
  8949  
  8950  LBB0_1486:
  8951  	WORD $0xff31 // xor    edi, edi
  8952  
  8953  LBB0_1487:
  8954  	LONG $0x01c0f641                           // test    r8b, 1
  8955  	JE   LBB0_1489
  8956  	LONG $0x31380f66; WORD $0x3a04             // pmovzxbd    xmm0, dword [rdx + rdi]
  8957  	LONG $0x31380f66; WORD $0x3a4c; BYTE $0x04 // pmovzxbd    xmm1, dword [rdx + rdi + 4]
  8958  	LONG $0x047f0ff3; BYTE $0xb9               // movdqu    oword [rcx + 4*rdi], xmm0
  8959  	LONG $0x4c7f0ff3; WORD $0x10b9             // movdqu    oword [rcx + 4*rdi + 16], xmm1
  8960  
  8961  LBB0_1489:
  8962  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8963  	JE   LBB0_1526
  8964  	JMP  LBB0_1490
  8965  
  8966  LBB0_1494:
  8967  	WORD $0xff31 // xor    edi, edi
  8968  
  8969  LBB0_1495:
  8970  	LONG $0x01c0f641               // test    r8b, 1
  8971  	JE   LBB0_1497
  8972  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
  8973  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  8974  	QUAD $0x000000c0956f0f66       // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI0_13] */
  8975  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  8976  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  8977  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  8978  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  8979  
  8980  LBB0_1497:
  8981  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  8982  	JE   LBB0_1526
  8983  	JMP  LBB0_1498
  8984  
  8985  LBB0_1502:
  8986  	WORD $0xff31 // xor    edi, edi
  8987  
  8988  LBB0_1503:
  8989  	LONG $0x01c0f641                           // test    r8b, 1
  8990  	JE   LBB0_1505
  8991  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  8992  	LONG $0xc0e60f66                           // cvttpd2dq    xmm0, xmm0
  8993  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  8994  	LONG $0x556f0f66; BYTE $0x10               // movdqa    xmm2, oword 16[rbp] /* [rip + .LCPI0_1] */
  8995  	LONG $0xc9e60f66                           // cvttpd2dq    xmm1, xmm1
  8996  	LONG $0x00380f66; BYTE $0xc2               // pshufb    xmm0, xmm2
  8997  	LONG $0x153a0f66; WORD $0x3904; BYTE $0x00 // pextrw    word [rcx + rdi], xmm0, 0
  8998  	LONG $0x00380f66; BYTE $0xca               // pshufb    xmm1, xmm2
  8999  	QUAD $0x0002394c153a0f66                   // pextrw    word [rcx + rdi + 2], xmm1, 0
  9000  
  9001  LBB0_1505:
  9002  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9003  	JE   LBB0_1526
  9004  	JMP  LBB0_1506
  9005  
  9006  LBB0_1510:
  9007  	WORD $0xff31 // xor    edi, edi
  9008  
  9009  LBB0_1511:
  9010  	LONG $0x01c0f641               // test    r8b, 1
  9011  	JE   LBB0_1513
  9012  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
  9013  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  9014  	QUAD $0x00000100956f0f66       // movdqa    xmm2, oword 256[rbp] /* [rip + .LCPI0_17] */
  9015  	LONG $0x00380f66; BYTE $0xc2   // pshufb    xmm0, xmm2
  9016  	LONG $0x00380f66; BYTE $0xca   // pshufb    xmm1, xmm2
  9017  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
  9018  	LONG $0x047f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm0
  9019  
  9020  LBB0_1513:
  9021  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9022  	JE   LBB0_1526
  9023  	JMP  LBB0_1514
  9024  
  9025  LBB0_1518:
  9026  	WORD $0xff31 // xor    edi, edi
  9027  
  9028  LBB0_1519:
  9029  	LONG $0x01c0f641               // test    r8b, 1
  9030  	JE   LBB0_1521
  9031  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  9032  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  9033  	LONG $0xc05b0ff3               // cvttps2dq    xmm0, xmm0
  9034  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
  9035  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
  9036  	LONG $0xc95b0ff3               // cvttps2dq    xmm1, xmm1
  9037  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
  9038  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
  9039  	LONG $0x047e0f66; BYTE $0x39   // movd    dword [rcx + rdi], xmm0
  9040  	LONG $0x4c7e0f66; WORD $0x0439 // movd    dword [rcx + rdi + 4], xmm1
  9041  
  9042  LBB0_1521:
  9043  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
  9044  	JE   LBB0_1526
  9045  	JMP  LBB0_1522