github.com/apache/arrow/go/v16@v16.1.0/arrow/compute/internal/kernels/base_arithmetic_sse4_amd64.s (about)

     1  //+build !noasm !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x00ff00ff00ff00ff
     5  DATA LCDATA1<>+0x008(SB)/8, $0x00ff00ff00ff00ff
     6  GLOBL LCDATA1<>(SB), 8, $16
     7  
     8  TEXT ยท_arithmetic_binary_sse4(SB), $0-48
     9  
    10  	MOVQ typ+0(FP), DI
    11  	MOVQ op+8(FP), SI
    12  	MOVQ inLeft+16(FP), DX
    13  	MOVQ inRight+24(FP), CX
    14  	MOVQ out+32(FP), R8
    15  	MOVQ len+40(FP), R9
    16  	LEAQ LCDATA1<>(SB), BP
    17  
    18  	LONG $0x14fe8040         // cmp    sil, 20
    19  	JG   LBB0_11
    20  	WORD $0x8440; BYTE $0xf6 // test    sil, sil
    21  	JE   LBB0_21
    22  	LONG $0x01fe8040         // cmp    sil, 1
    23  	JE   LBB0_367
    24  	LONG $0x02fe8040         // cmp    sil, 2
    25  	JNE  LBB0_1013
    26  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    27  	JG   LBB0_719
    28  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
    29  	JLE  LBB0_6
    30  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
    31  	JE   LBB0_760
    32  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
    33  	JE   LBB0_776
    34  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    35  	JNE  LBB0_1013
    36  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
    37  	JLE  LBB0_1013
    38  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
    39  	LONG $0x08f98341         // cmp    r9d, 8
    40  	JAE  LBB0_792
    41  	WORD $0xf631             // xor    esi, esi
    42  
    43  LBB0_801:
    44  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
    45  	WORD $0xf749; BYTE $0xd1 // not    r9
    46  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
    47  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
    48  	LONG $0x03e78348         // and    rdi, 3
    49  	JE   LBB0_803
    50  
    51  LBB0_802:
    52  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
    53  	LONG $0xb204af0f         // imul    eax, dword [rdx + 4*rsi]
    54  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
    55  	LONG $0x01c68348         // add    rsi, 1
    56  	LONG $0xffc78348         // add    rdi, -1
    57  	JNE  LBB0_802
    58  
    59  LBB0_803:
    60  	LONG $0x03f98349 // cmp    r9, 3
    61  	JB   LBB0_1013
    62  
    63  LBB0_804:
    64  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
    65  	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
    66  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
    67  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
    68  	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
    69  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
    70  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
    71  	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
    72  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
    73  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
    74  	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
    75  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
    76  	LONG $0x04c68348             // add    rsi, 4
    77  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
    78  	JNE  LBB0_804
    79  	JMP  LBB0_1013
    80  
    81  LBB0_11:
    82  	LONG $0x15fe8040         // cmp    sil, 21
    83  	JE   LBB0_194
    84  	LONG $0x16fe8040         // cmp    sil, 22
    85  	JE   LBB0_540
    86  	LONG $0x17fe8040         // cmp    sil, 23
    87  	JNE  LBB0_1013
    88  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    89  	JG   LBB0_869
    90  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
    91  	JLE  LBB0_16
    92  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
    93  	JE   LBB0_910
    94  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
    95  	JE   LBB0_926
    96  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
    97  	JNE  LBB0_1013
    98  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
    99  	JLE  LBB0_1013
   100  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   101  	LONG $0x08f98341         // cmp    r9d, 8
   102  	JAE  LBB0_942
   103  	WORD $0xf631             // xor    esi, esi
   104  
   105  LBB0_951:
   106  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   107  	WORD $0xf749; BYTE $0xd1 // not    r9
   108  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   109  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   110  	LONG $0x03e78348         // and    rdi, 3
   111  	JE   LBB0_953
   112  
   113  LBB0_952:
   114  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
   115  	LONG $0xb204af0f         // imul    eax, dword [rdx + 4*rsi]
   116  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
   117  	LONG $0x01c68348         // add    rsi, 1
   118  	LONG $0xffc78348         // add    rdi, -1
   119  	JNE  LBB0_952
   120  
   121  LBB0_953:
   122  	LONG $0x03f98349 // cmp    r9, 3
   123  	JB   LBB0_1013
   124  
   125  LBB0_954:
   126  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
   127  	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
   128  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
   129  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
   130  	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
   131  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
   132  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
   133  	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
   134  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
   135  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
   136  	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
   137  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
   138  	LONG $0x04c68348             // add    rsi, 4
   139  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   140  	JNE  LBB0_954
   141  	JMP  LBB0_1013
   142  
   143  LBB0_21:
   144  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   145  	JG   LBB0_34
   146  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   147  	JLE  LBB0_23
   148  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
   149  	JE   LBB0_75
   150  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
   151  	JE   LBB0_91
   152  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   153  	JNE  LBB0_1013
   154  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   155  	JLE  LBB0_1013
   156  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   157  	LONG $0x08f98341         // cmp    r9d, 8
   158  	JAE  LBB0_107
   159  	WORD $0xf631             // xor    esi, esi
   160  
   161  LBB0_116:
   162  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   163  	WORD $0xf749; BYTE $0xd1 // not    r9
   164  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   165  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   166  	LONG $0x03e78348         // and    rdi, 3
   167  	JE   LBB0_118
   168  
   169  LBB0_117:
   170  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
   171  	WORD $0x0403; BYTE $0xb2 // add    eax, dword [rdx + 4*rsi]
   172  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
   173  	LONG $0x01c68348         // add    rsi, 1
   174  	LONG $0xffc78348         // add    rdi, -1
   175  	JNE  LBB0_117
   176  
   177  LBB0_118:
   178  	LONG $0x03f98349 // cmp    r9, 3
   179  	JB   LBB0_1013
   180  
   181  LBB0_119:
   182  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
   183  	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
   184  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
   185  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
   186  	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
   187  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
   188  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
   189  	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
   190  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
   191  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
   192  	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
   193  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
   194  	LONG $0x04c68348             // add    rsi, 4
   195  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   196  	JNE  LBB0_119
   197  	JMP  LBB0_1013
   198  
   199  LBB0_367:
   200  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   201  	JG   LBB0_380
   202  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   203  	JLE  LBB0_369
   204  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
   205  	JE   LBB0_421
   206  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
   207  	JE   LBB0_437
   208  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   209  	JNE  LBB0_1013
   210  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   211  	JLE  LBB0_1013
   212  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   213  	LONG $0x08f98341         // cmp    r9d, 8
   214  	JAE  LBB0_453
   215  	WORD $0xf631             // xor    esi, esi
   216  
   217  LBB0_462:
   218  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   219  	WORD $0xf749; BYTE $0xd1 // not    r9
   220  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   221  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   222  	LONG $0x03e78348         // and    rdi, 3
   223  	JE   LBB0_464
   224  
   225  LBB0_463:
   226  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   227  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
   228  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
   229  	LONG $0x01c68348         // add    rsi, 1
   230  	LONG $0xffc78348         // add    rdi, -1
   231  	JNE  LBB0_463
   232  
   233  LBB0_464:
   234  	LONG $0x03f98349 // cmp    r9, 3
   235  	JB   LBB0_1013
   236  
   237  LBB0_465:
   238  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
   239  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
   240  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
   241  	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
   242  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
   243  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
   244  	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
   245  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
   246  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
   247  	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
   248  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
   249  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
   250  	LONG $0x04c68348             // add    rsi, 4
   251  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   252  	JNE  LBB0_465
   253  	JMP  LBB0_1013
   254  
   255  LBB0_194:
   256  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   257  	JG   LBB0_207
   258  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   259  	JLE  LBB0_196
   260  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
   261  	JE   LBB0_248
   262  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
   263  	JE   LBB0_264
   264  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   265  	JNE  LBB0_1013
   266  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   267  	JLE  LBB0_1013
   268  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   269  	LONG $0x08f98341         // cmp    r9d, 8
   270  	JAE  LBB0_280
   271  	WORD $0xf631             // xor    esi, esi
   272  
   273  LBB0_289:
   274  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   275  	WORD $0xf749; BYTE $0xd1 // not    r9
   276  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   277  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   278  	LONG $0x03e78348         // and    rdi, 3
   279  	JE   LBB0_291
   280  
   281  LBB0_290:
   282  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
   283  	WORD $0x0403; BYTE $0xb2 // add    eax, dword [rdx + 4*rsi]
   284  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
   285  	LONG $0x01c68348         // add    rsi, 1
   286  	LONG $0xffc78348         // add    rdi, -1
   287  	JNE  LBB0_290
   288  
   289  LBB0_291:
   290  	LONG $0x03f98349 // cmp    r9, 3
   291  	JB   LBB0_1013
   292  
   293  LBB0_292:
   294  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
   295  	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
   296  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
   297  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
   298  	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
   299  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
   300  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
   301  	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
   302  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
   303  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
   304  	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
   305  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
   306  	LONG $0x04c68348             // add    rsi, 4
   307  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   308  	JNE  LBB0_292
   309  	JMP  LBB0_1013
   310  
   311  LBB0_540:
   312  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   313  	JG   LBB0_553
   314  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   315  	JLE  LBB0_542
   316  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
   317  	JE   LBB0_594
   318  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
   319  	JE   LBB0_610
   320  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
   321  	JNE  LBB0_1013
   322  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   323  	JLE  LBB0_1013
   324  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   325  	LONG $0x08f98341         // cmp    r9d, 8
   326  	JAE  LBB0_626
   327  	WORD $0xf631             // xor    esi, esi
   328  
   329  LBB0_635:
   330  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   331  	WORD $0xf749; BYTE $0xd1 // not    r9
   332  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   333  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   334  	LONG $0x03e78348         // and    rdi, 3
   335  	JE   LBB0_637
   336  
   337  LBB0_636:
   338  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
   339  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
   340  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
   341  	LONG $0x01c68348         // add    rsi, 1
   342  	LONG $0xffc78348         // add    rdi, -1
   343  	JNE  LBB0_636
   344  
   345  LBB0_637:
   346  	LONG $0x03f98349 // cmp    r9, 3
   347  	JB   LBB0_1013
   348  
   349  LBB0_638:
   350  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
   351  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
   352  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
   353  	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
   354  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
   355  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
   356  	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
   357  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
   358  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
   359  	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
   360  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
   361  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
   362  	LONG $0x04c68348             // add    rsi, 4
   363  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   364  	JNE  LBB0_638
   365  	JMP  LBB0_1013
   366  
   367  LBB0_719:
   368  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   369  	JLE  LBB0_720
   370  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   371  	JE   LBB0_826
   372  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   373  	JE   LBB0_834
   374  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   375  	JNE  LBB0_1013
   376  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   377  	JLE  LBB0_1013
   378  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   379  	LONG $0x04f98341         // cmp    r9d, 4
   380  	JAE  LBB0_850
   381  	WORD $0xf631             // xor    esi, esi
   382  
   383  LBB0_859:
   384  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   385  	WORD $0xf748; BYTE $0xd0 // not    rax
   386  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   387  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   388  	LONG $0x03e78348         // and    rdi, 3
   389  	JE   LBB0_861
   390  
   391  LBB0_860:
   392  	LONG $0x04100ff2; BYTE $0xf1   // movsd    xmm0, qword [rcx + 8*rsi]
   393  	LONG $0x04590ff2; BYTE $0xf2   // mulsd    xmm0, qword [rdx + 8*rsi]
   394  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   395  	LONG $0x01c68348               // add    rsi, 1
   396  	LONG $0xffc78348               // add    rdi, -1
   397  	JNE  LBB0_860
   398  
   399  LBB0_861:
   400  	LONG $0x03f88348 // cmp    rax, 3
   401  	JB   LBB0_1013
   402  
   403  LBB0_862:
   404  	LONG $0x04100ff2; BYTE $0xf1               // movsd    xmm0, qword [rcx + 8*rsi]
   405  	LONG $0x04590ff2; BYTE $0xf2               // mulsd    xmm0, qword [rdx + 8*rsi]
   406  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   407  	LONG $0x44100ff2; WORD $0x08f1             // movsd    xmm0, qword [rcx + 8*rsi + 8]
   408  	LONG $0x44590ff2; WORD $0x08f2             // mulsd    xmm0, qword [rdx + 8*rsi + 8]
   409  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   410  	LONG $0x44100ff2; WORD $0x10f1             // movsd    xmm0, qword [rcx + 8*rsi + 16]
   411  	LONG $0x44590ff2; WORD $0x10f2             // mulsd    xmm0, qword [rdx + 8*rsi + 16]
   412  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   413  	LONG $0x44100ff2; WORD $0x18f1             // movsd    xmm0, qword [rcx + 8*rsi + 24]
   414  	LONG $0x44590ff2; WORD $0x18f2             // mulsd    xmm0, qword [rdx + 8*rsi + 24]
   415  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   416  	LONG $0x04c68348                           // add    rsi, 4
   417  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   418  	JNE  LBB0_862
   419  	JMP  LBB0_1013
   420  
   421  LBB0_869:
   422  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   423  	JLE  LBB0_870
   424  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   425  	JE   LBB0_976
   426  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   427  	JE   LBB0_984
   428  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   429  	JNE  LBB0_1013
   430  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   431  	JLE  LBB0_1013
   432  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   433  	LONG $0x04f98341         // cmp    r9d, 4
   434  	JAE  LBB0_1000
   435  	WORD $0xf631             // xor    esi, esi
   436  
   437  LBB0_1009:
   438  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   439  	WORD $0xf748; BYTE $0xd0 // not    rax
   440  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   441  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   442  	LONG $0x03e78348         // and    rdi, 3
   443  	JE   LBB0_1011
   444  
   445  LBB0_1010:
   446  	LONG $0x04100ff2; BYTE $0xf1   // movsd    xmm0, qword [rcx + 8*rsi]
   447  	LONG $0x04590ff2; BYTE $0xf2   // mulsd    xmm0, qword [rdx + 8*rsi]
   448  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   449  	LONG $0x01c68348               // add    rsi, 1
   450  	LONG $0xffc78348               // add    rdi, -1
   451  	JNE  LBB0_1010
   452  
   453  LBB0_1011:
   454  	LONG $0x03f88348 // cmp    rax, 3
   455  	JB   LBB0_1013
   456  
   457  LBB0_1012:
   458  	LONG $0x04100ff2; BYTE $0xf1               // movsd    xmm0, qword [rcx + 8*rsi]
   459  	LONG $0x04590ff2; BYTE $0xf2               // mulsd    xmm0, qword [rdx + 8*rsi]
   460  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   461  	LONG $0x44100ff2; WORD $0x08f1             // movsd    xmm0, qword [rcx + 8*rsi + 8]
   462  	LONG $0x44590ff2; WORD $0x08f2             // mulsd    xmm0, qword [rdx + 8*rsi + 8]
   463  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   464  	LONG $0x44100ff2; WORD $0x10f1             // movsd    xmm0, qword [rcx + 8*rsi + 16]
   465  	LONG $0x44590ff2; WORD $0x10f2             // mulsd    xmm0, qword [rdx + 8*rsi + 16]
   466  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   467  	LONG $0x44100ff2; WORD $0x18f1             // movsd    xmm0, qword [rcx + 8*rsi + 24]
   468  	LONG $0x44590ff2; WORD $0x18f2             // mulsd    xmm0, qword [rdx + 8*rsi + 24]
   469  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   470  	LONG $0x04c68348                           // add    rsi, 4
   471  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   472  	JNE  LBB0_1012
   473  	JMP  LBB0_1013
   474  
   475  LBB0_34:
   476  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   477  	JLE  LBB0_35
   478  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   479  	JE   LBB0_149
   480  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   481  	JE   LBB0_165
   482  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   483  	JNE  LBB0_1013
   484  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   485  	JLE  LBB0_1013
   486  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   487  	LONG $0x04f98341         // cmp    r9d, 4
   488  	JAE  LBB0_181
   489  	WORD $0xf631             // xor    esi, esi
   490  
   491  LBB0_190:
   492  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   493  	WORD $0xf748; BYTE $0xd0 // not    rax
   494  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   495  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   496  	LONG $0x03e78348         // and    rdi, 3
   497  	JE   LBB0_192
   498  
   499  LBB0_191:
   500  	LONG $0x04100ff2; BYTE $0xf1   // movsd    xmm0, qword [rcx + 8*rsi]
   501  	LONG $0x04580ff2; BYTE $0xf2   // addsd    xmm0, qword [rdx + 8*rsi]
   502  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   503  	LONG $0x01c68348               // add    rsi, 1
   504  	LONG $0xffc78348               // add    rdi, -1
   505  	JNE  LBB0_191
   506  
   507  LBB0_192:
   508  	LONG $0x03f88348 // cmp    rax, 3
   509  	JB   LBB0_1013
   510  
   511  LBB0_193:
   512  	LONG $0x04100ff2; BYTE $0xf1               // movsd    xmm0, qword [rcx + 8*rsi]
   513  	LONG $0x04580ff2; BYTE $0xf2               // addsd    xmm0, qword [rdx + 8*rsi]
   514  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   515  	LONG $0x44100ff2; WORD $0x08f1             // movsd    xmm0, qword [rcx + 8*rsi + 8]
   516  	LONG $0x44580ff2; WORD $0x08f2             // addsd    xmm0, qword [rdx + 8*rsi + 8]
   517  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   518  	LONG $0x44100ff2; WORD $0x10f1             // movsd    xmm0, qword [rcx + 8*rsi + 16]
   519  	LONG $0x44580ff2; WORD $0x10f2             // addsd    xmm0, qword [rdx + 8*rsi + 16]
   520  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   521  	LONG $0x44100ff2; WORD $0x18f1             // movsd    xmm0, qword [rcx + 8*rsi + 24]
   522  	LONG $0x44580ff2; WORD $0x18f2             // addsd    xmm0, qword [rdx + 8*rsi + 24]
   523  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   524  	LONG $0x04c68348                           // add    rsi, 4
   525  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   526  	JNE  LBB0_193
   527  	JMP  LBB0_1013
   528  
   529  LBB0_380:
   530  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   531  	JLE  LBB0_381
   532  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   533  	JE   LBB0_495
   534  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   535  	JE   LBB0_511
   536  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   537  	JNE  LBB0_1013
   538  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   539  	JLE  LBB0_1013
   540  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   541  	LONG $0x04f98341         // cmp    r9d, 4
   542  	JAE  LBB0_527
   543  	WORD $0xf631             // xor    esi, esi
   544  
   545  LBB0_536:
   546  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   547  	WORD $0xf748; BYTE $0xd0 // not    rax
   548  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   549  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   550  	LONG $0x03e78348         // and    rdi, 3
   551  	JE   LBB0_538
   552  
   553  LBB0_537:
   554  	LONG $0x04100ff2; BYTE $0xf2   // movsd    xmm0, qword [rdx + 8*rsi]
   555  	LONG $0x045c0ff2; BYTE $0xf1   // subsd    xmm0, qword [rcx + 8*rsi]
   556  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   557  	LONG $0x01c68348               // add    rsi, 1
   558  	LONG $0xffc78348               // add    rdi, -1
   559  	JNE  LBB0_537
   560  
   561  LBB0_538:
   562  	LONG $0x03f88348 // cmp    rax, 3
   563  	JB   LBB0_1013
   564  
   565  LBB0_539:
   566  	LONG $0x04100ff2; BYTE $0xf2               // movsd    xmm0, qword [rdx + 8*rsi]
   567  	LONG $0x045c0ff2; BYTE $0xf1               // subsd    xmm0, qword [rcx + 8*rsi]
   568  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   569  	LONG $0x44100ff2; WORD $0x08f2             // movsd    xmm0, qword [rdx + 8*rsi + 8]
   570  	LONG $0x445c0ff2; WORD $0x08f1             // subsd    xmm0, qword [rcx + 8*rsi + 8]
   571  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   572  	LONG $0x44100ff2; WORD $0x10f2             // movsd    xmm0, qword [rdx + 8*rsi + 16]
   573  	LONG $0x445c0ff2; WORD $0x10f1             // subsd    xmm0, qword [rcx + 8*rsi + 16]
   574  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   575  	LONG $0x44100ff2; WORD $0x18f2             // movsd    xmm0, qword [rdx + 8*rsi + 24]
   576  	LONG $0x445c0ff2; WORD $0x18f1             // subsd    xmm0, qword [rcx + 8*rsi + 24]
   577  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   578  	LONG $0x04c68348                           // add    rsi, 4
   579  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   580  	JNE  LBB0_539
   581  	JMP  LBB0_1013
   582  
   583  LBB0_207:
   584  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   585  	JLE  LBB0_208
   586  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   587  	JE   LBB0_322
   588  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   589  	JE   LBB0_338
   590  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   591  	JNE  LBB0_1013
   592  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   593  	JLE  LBB0_1013
   594  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   595  	LONG $0x04f98341         // cmp    r9d, 4
   596  	JAE  LBB0_354
   597  	WORD $0xf631             // xor    esi, esi
   598  
   599  LBB0_363:
   600  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   601  	WORD $0xf748; BYTE $0xd0 // not    rax
   602  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   603  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   604  	LONG $0x03e78348         // and    rdi, 3
   605  	JE   LBB0_365
   606  
   607  LBB0_364:
   608  	LONG $0x04100ff2; BYTE $0xf1   // movsd    xmm0, qword [rcx + 8*rsi]
   609  	LONG $0x04580ff2; BYTE $0xf2   // addsd    xmm0, qword [rdx + 8*rsi]
   610  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   611  	LONG $0x01c68348               // add    rsi, 1
   612  	LONG $0xffc78348               // add    rdi, -1
   613  	JNE  LBB0_364
   614  
   615  LBB0_365:
   616  	LONG $0x03f88348 // cmp    rax, 3
   617  	JB   LBB0_1013
   618  
   619  LBB0_366:
   620  	LONG $0x04100ff2; BYTE $0xf1               // movsd    xmm0, qword [rcx + 8*rsi]
   621  	LONG $0x04580ff2; BYTE $0xf2               // addsd    xmm0, qword [rdx + 8*rsi]
   622  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   623  	LONG $0x44100ff2; WORD $0x08f1             // movsd    xmm0, qword [rcx + 8*rsi + 8]
   624  	LONG $0x44580ff2; WORD $0x08f2             // addsd    xmm0, qword [rdx + 8*rsi + 8]
   625  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   626  	LONG $0x44100ff2; WORD $0x10f1             // movsd    xmm0, qword [rcx + 8*rsi + 16]
   627  	LONG $0x44580ff2; WORD $0x10f2             // addsd    xmm0, qword [rdx + 8*rsi + 16]
   628  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   629  	LONG $0x44100ff2; WORD $0x18f1             // movsd    xmm0, qword [rcx + 8*rsi + 24]
   630  	LONG $0x44580ff2; WORD $0x18f2             // addsd    xmm0, qword [rdx + 8*rsi + 24]
   631  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   632  	LONG $0x04c68348                           // add    rsi, 4
   633  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   634  	JNE  LBB0_366
   635  	JMP  LBB0_1013
   636  
   637  LBB0_553:
   638  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   639  	JLE  LBB0_554
   640  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
   641  	JE   LBB0_668
   642  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
   643  	JE   LBB0_684
   644  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
   645  	JNE  LBB0_1013
   646  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   647  	JLE  LBB0_1013
   648  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   649  	LONG $0x04f98341         // cmp    r9d, 4
   650  	JAE  LBB0_700
   651  	WORD $0xf631             // xor    esi, esi
   652  
   653  LBB0_709:
   654  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   655  	WORD $0xf748; BYTE $0xd0 // not    rax
   656  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
   657  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   658  	LONG $0x03e78348         // and    rdi, 3
   659  	JE   LBB0_711
   660  
   661  LBB0_710:
   662  	LONG $0x04100ff2; BYTE $0xf2   // movsd    xmm0, qword [rdx + 8*rsi]
   663  	LONG $0x045c0ff2; BYTE $0xf1   // subsd    xmm0, qword [rcx + 8*rsi]
   664  	LONG $0x110f41f2; WORD $0xf004 // movsd    qword [r8 + 8*rsi], xmm0
   665  	LONG $0x01c68348               // add    rsi, 1
   666  	LONG $0xffc78348               // add    rdi, -1
   667  	JNE  LBB0_710
   668  
   669  LBB0_711:
   670  	LONG $0x03f88348 // cmp    rax, 3
   671  	JB   LBB0_1013
   672  
   673  LBB0_712:
   674  	LONG $0x04100ff2; BYTE $0xf2               // movsd    xmm0, qword [rdx + 8*rsi]
   675  	LONG $0x045c0ff2; BYTE $0xf1               // subsd    xmm0, qword [rcx + 8*rsi]
   676  	LONG $0x110f41f2; WORD $0xf004             // movsd    qword [r8 + 8*rsi], xmm0
   677  	LONG $0x44100ff2; WORD $0x08f2             // movsd    xmm0, qword [rdx + 8*rsi + 8]
   678  	LONG $0x445c0ff2; WORD $0x08f1             // subsd    xmm0, qword [rcx + 8*rsi + 8]
   679  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm0
   680  	LONG $0x44100ff2; WORD $0x10f2             // movsd    xmm0, qword [rdx + 8*rsi + 16]
   681  	LONG $0x445c0ff2; WORD $0x10f1             // subsd    xmm0, qword [rcx + 8*rsi + 16]
   682  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm0
   683  	LONG $0x44100ff2; WORD $0x18f2             // movsd    xmm0, qword [rdx + 8*rsi + 24]
   684  	LONG $0x445c0ff2; WORD $0x18f1             // subsd    xmm0, qword [rcx + 8*rsi + 24]
   685  	LONG $0x110f41f2; WORD $0xf044; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm0
   686  	LONG $0x04c68348                           // add    rsi, 4
   687  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
   688  	JNE  LBB0_712
   689  	JMP  LBB0_1013
   690  
   691  LBB0_6:
   692  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   693  	JE   LBB0_731
   694  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   695  	JNE  LBB0_1013
   696  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   697  	JLE  LBB0_1013
   698  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   699  	LONG $0x20f98341         // cmp    r9d, 32
   700  	JAE  LBB0_747
   701  	WORD $0xff31             // xor    edi, edi
   702  
   703  LBB0_756:
   704  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
   705  	WORD $0xf749; BYTE $0xd1 // not    r9
   706  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   707  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
   708  	LONG $0x03e68348         // and    rsi, 3
   709  	JE   LBB0_758
   710  
   711  LBB0_757:
   712  	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
   713  	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
   714  	LONG $0x38048841         // mov    byte [r8 + rdi], al
   715  	LONG $0x01c78348         // add    rdi, 1
   716  	LONG $0xffc68348         // add    rsi, -1
   717  	JNE  LBB0_757
   718  
   719  LBB0_758:
   720  	LONG $0x03f98349 // cmp    r9, 3
   721  	JB   LBB0_1013
   722  
   723  LBB0_759:
   724  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
   725  	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
   726  	LONG $0x38048841             // mov    byte [r8 + rdi], al
   727  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
   728  	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
   729  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
   730  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
   731  	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
   732  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
   733  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
   734  	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
   735  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
   736  	LONG $0x04c78348             // add    rdi, 4
   737  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
   738  	JNE  LBB0_759
   739  	JMP  LBB0_1013
   740  
   741  LBB0_16:
   742  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   743  	JE   LBB0_881
   744  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   745  	JNE  LBB0_1013
   746  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   747  	JLE  LBB0_1013
   748  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   749  	LONG $0x20f98341         // cmp    r9d, 32
   750  	JAE  LBB0_897
   751  	WORD $0xff31             // xor    edi, edi
   752  
   753  LBB0_906:
   754  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
   755  	WORD $0xf749; BYTE $0xd1 // not    r9
   756  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   757  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
   758  	LONG $0x03e68348         // and    rsi, 3
   759  	JE   LBB0_908
   760  
   761  LBB0_907:
   762  	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
   763  	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
   764  	LONG $0x38048841         // mov    byte [r8 + rdi], al
   765  	LONG $0x01c78348         // add    rdi, 1
   766  	LONG $0xffc68348         // add    rsi, -1
   767  	JNE  LBB0_907
   768  
   769  LBB0_908:
   770  	LONG $0x03f98349 // cmp    r9, 3
   771  	JB   LBB0_1013
   772  
   773  LBB0_909:
   774  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
   775  	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
   776  	LONG $0x38048841             // mov    byte [r8 + rdi], al
   777  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
   778  	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
   779  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
   780  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
   781  	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
   782  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
   783  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
   784  	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
   785  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
   786  	LONG $0x04c78348             // add    rdi, 4
   787  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
   788  	JNE  LBB0_909
   789  	JMP  LBB0_1013
   790  
   791  LBB0_23:
   792  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   793  	JE   LBB0_46
   794  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   795  	JNE  LBB0_1013
   796  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   797  	JLE  LBB0_1013
   798  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   799  	LONG $0x20f98341         // cmp    r9d, 32
   800  	JAE  LBB0_62
   801  	WORD $0xf631             // xor    esi, esi
   802  
   803  LBB0_71:
   804  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   805  	WORD $0xf749; BYTE $0xd1 // not    r9
   806  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   807  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   808  	LONG $0x03e78348         // and    rdi, 3
   809  	JE   LBB0_73
   810  
   811  LBB0_72:
   812  	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
   813  	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
   814  	LONG $0x30048841         // mov    byte [r8 + rsi], al
   815  	LONG $0x01c68348         // add    rsi, 1
   816  	LONG $0xffc78348         // add    rdi, -1
   817  	JNE  LBB0_72
   818  
   819  LBB0_73:
   820  	LONG $0x03f98349 // cmp    r9, 3
   821  	JB   LBB0_1013
   822  
   823  LBB0_74:
   824  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
   825  	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
   826  	LONG $0x30048841             // mov    byte [r8 + rsi], al
   827  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
   828  	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
   829  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
   830  	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
   831  	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
   832  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
   833  	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
   834  	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
   835  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
   836  	LONG $0x04c68348             // add    rsi, 4
   837  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   838  	JNE  LBB0_74
   839  	JMP  LBB0_1013
   840  
   841  LBB0_369:
   842  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   843  	JE   LBB0_392
   844  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   845  	JNE  LBB0_1013
   846  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   847  	JLE  LBB0_1013
   848  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   849  	LONG $0x20f98341         // cmp    r9d, 32
   850  	JAE  LBB0_408
   851  	WORD $0xf631             // xor    esi, esi
   852  
   853  LBB0_417:
   854  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   855  	WORD $0xf749; BYTE $0xd1 // not    r9
   856  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   857  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   858  	LONG $0x03e78348         // and    rdi, 3
   859  	JE   LBB0_419
   860  
   861  LBB0_418:
   862  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
   863  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
   864  	LONG $0x30048841         // mov    byte [r8 + rsi], al
   865  	LONG $0x01c68348         // add    rsi, 1
   866  	LONG $0xffc78348         // add    rdi, -1
   867  	JNE  LBB0_418
   868  
   869  LBB0_419:
   870  	LONG $0x03f98349 // cmp    r9, 3
   871  	JB   LBB0_1013
   872  
   873  LBB0_420:
   874  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   875  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
   876  	LONG $0x30048841             // mov    byte [r8 + rsi], al
   877  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   878  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
   879  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
   880  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   881  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
   882  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
   883  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   884  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
   885  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
   886  	LONG $0x04c68348             // add    rsi, 4
   887  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   888  	JNE  LBB0_420
   889  	JMP  LBB0_1013
   890  
   891  LBB0_196:
   892  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   893  	JE   LBB0_219
   894  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   895  	JNE  LBB0_1013
   896  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   897  	JLE  LBB0_1013
   898  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   899  	LONG $0x20f98341         // cmp    r9d, 32
   900  	JAE  LBB0_235
   901  	WORD $0xf631             // xor    esi, esi
   902  
   903  LBB0_244:
   904  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   905  	WORD $0xf749; BYTE $0xd1 // not    r9
   906  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   907  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   908  	LONG $0x03e78348         // and    rdi, 3
   909  	JE   LBB0_246
   910  
   911  LBB0_245:
   912  	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
   913  	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
   914  	LONG $0x30048841         // mov    byte [r8 + rsi], al
   915  	LONG $0x01c68348         // add    rsi, 1
   916  	LONG $0xffc78348         // add    rdi, -1
   917  	JNE  LBB0_245
   918  
   919  LBB0_246:
   920  	LONG $0x03f98349 // cmp    r9, 3
   921  	JB   LBB0_1013
   922  
   923  LBB0_247:
   924  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
   925  	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
   926  	LONG $0x30048841             // mov    byte [r8 + rsi], al
   927  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
   928  	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
   929  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
   930  	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
   931  	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
   932  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
   933  	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
   934  	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
   935  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
   936  	LONG $0x04c68348             // add    rsi, 4
   937  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   938  	JNE  LBB0_247
   939  	JMP  LBB0_1013
   940  
   941  LBB0_542:
   942  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
   943  	JE   LBB0_565
   944  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
   945  	JNE  LBB0_1013
   946  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   947  	JLE  LBB0_1013
   948  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   949  	LONG $0x20f98341         // cmp    r9d, 32
   950  	JAE  LBB0_581
   951  	WORD $0xf631             // xor    esi, esi
   952  
   953  LBB0_590:
   954  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
   955  	WORD $0xf749; BYTE $0xd1 // not    r9
   956  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
   957  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
   958  	LONG $0x03e78348         // and    rdi, 3
   959  	JE   LBB0_592
   960  
   961  LBB0_591:
   962  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
   963  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
   964  	LONG $0x30048841         // mov    byte [r8 + rsi], al
   965  	LONG $0x01c68348         // add    rsi, 1
   966  	LONG $0xffc78348         // add    rdi, -1
   967  	JNE  LBB0_591
   968  
   969  LBB0_592:
   970  	LONG $0x03f98349 // cmp    r9, 3
   971  	JB   LBB0_1013
   972  
   973  LBB0_593:
   974  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
   975  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
   976  	LONG $0x30048841             // mov    byte [r8 + rsi], al
   977  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
   978  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
   979  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
   980  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
   981  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
   982  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
   983  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
   984  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
   985  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
   986  	LONG $0x04c68348             // add    rsi, 4
   987  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
   988  	JNE  LBB0_593
   989  	JMP  LBB0_1013
   990  
   991  LBB0_720:
   992  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
   993  	JE   LBB0_805
   994  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
   995  	JNE  LBB0_1013
   996  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
   997  	JLE  LBB0_1013
   998  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
   999  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  1000  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  1001  	LONG $0x03e18341         // and    r9d, 3
  1002  	LONG $0x03ff8348         // cmp    rdi, 3
  1003  	JAE  LBB0_821
  1004  	WORD $0xff31             // xor    edi, edi
  1005  	JMP  LBB0_823
  1006  
  1007  LBB0_870:
  1008  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  1009  	JE   LBB0_955
  1010  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  1011  	JNE  LBB0_1013
  1012  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1013  	JLE  LBB0_1013
  1014  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  1015  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  1016  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  1017  	LONG $0x03e18341         // and    r9d, 3
  1018  	LONG $0x03ff8348         // cmp    rdi, 3
  1019  	JAE  LBB0_971
  1020  	WORD $0xff31             // xor    edi, edi
  1021  	JMP  LBB0_973
  1022  
  1023  LBB0_35:
  1024  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  1025  	JE   LBB0_120
  1026  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  1027  	JNE  LBB0_1013
  1028  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1029  	JLE  LBB0_1013
  1030  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1031  	LONG $0x04f98341         // cmp    r9d, 4
  1032  	JAE  LBB0_136
  1033  	WORD $0xf631             // xor    esi, esi
  1034  
  1035  LBB0_145:
  1036  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1037  	WORD $0xf749; BYTE $0xd1 // not    r9
  1038  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1039  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1040  	LONG $0x03e78348         // and    rdi, 3
  1041  	JE   LBB0_147
  1042  
  1043  LBB0_146:
  1044  	LONG $0xf1048b48 // mov    rax, qword [rcx + 8*rsi]
  1045  	LONG $0xf2040348 // add    rax, qword [rdx + 8*rsi]
  1046  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  1047  	LONG $0x01c68348 // add    rsi, 1
  1048  	LONG $0xffc78348 // add    rdi, -1
  1049  	JNE  LBB0_146
  1050  
  1051  LBB0_147:
  1052  	LONG $0x03f98349 // cmp    r9, 3
  1053  	JB   LBB0_1013
  1054  
  1055  LBB0_148:
  1056  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
  1057  	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
  1058  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  1059  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
  1060  	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
  1061  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  1062  	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
  1063  	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
  1064  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  1065  	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
  1066  	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
  1067  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  1068  	LONG $0x04c68348             // add    rsi, 4
  1069  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  1070  	JNE  LBB0_148
  1071  	JMP  LBB0_1013
  1072  
  1073  LBB0_381:
  1074  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  1075  	JE   LBB0_466
  1076  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  1077  	JNE  LBB0_1013
  1078  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1079  	JLE  LBB0_1013
  1080  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1081  	LONG $0x04f98341         // cmp    r9d, 4
  1082  	JAE  LBB0_482
  1083  	WORD $0xf631             // xor    esi, esi
  1084  
  1085  LBB0_491:
  1086  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1087  	WORD $0xf749; BYTE $0xd1 // not    r9
  1088  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1089  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1090  	LONG $0x03e78348         // and    rdi, 3
  1091  	JE   LBB0_493
  1092  
  1093  LBB0_492:
  1094  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1095  	LONG $0xf1042b48 // sub    rax, qword [rcx + 8*rsi]
  1096  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  1097  	LONG $0x01c68348 // add    rsi, 1
  1098  	LONG $0xffc78348 // add    rdi, -1
  1099  	JNE  LBB0_492
  1100  
  1101  LBB0_493:
  1102  	LONG $0x03f98349 // cmp    r9, 3
  1103  	JB   LBB0_1013
  1104  
  1105  LBB0_494:
  1106  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1107  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
  1108  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  1109  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1110  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
  1111  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  1112  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1113  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
  1114  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  1115  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1116  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
  1117  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  1118  	LONG $0x04c68348             // add    rsi, 4
  1119  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  1120  	JNE  LBB0_494
  1121  	JMP  LBB0_1013
  1122  
  1123  LBB0_208:
  1124  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  1125  	JE   LBB0_293
  1126  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  1127  	JNE  LBB0_1013
  1128  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1129  	JLE  LBB0_1013
  1130  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1131  	LONG $0x04f98341         // cmp    r9d, 4
  1132  	JAE  LBB0_309
  1133  	WORD $0xf631             // xor    esi, esi
  1134  
  1135  LBB0_318:
  1136  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1137  	WORD $0xf749; BYTE $0xd1 // not    r9
  1138  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1139  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1140  	LONG $0x03e78348         // and    rdi, 3
  1141  	JE   LBB0_320
  1142  
  1143  LBB0_319:
  1144  	LONG $0xf1048b48 // mov    rax, qword [rcx + 8*rsi]
  1145  	LONG $0xf2040348 // add    rax, qword [rdx + 8*rsi]
  1146  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  1147  	LONG $0x01c68348 // add    rsi, 1
  1148  	LONG $0xffc78348 // add    rdi, -1
  1149  	JNE  LBB0_319
  1150  
  1151  LBB0_320:
  1152  	LONG $0x03f98349 // cmp    r9, 3
  1153  	JB   LBB0_1013
  1154  
  1155  LBB0_321:
  1156  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
  1157  	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
  1158  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  1159  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
  1160  	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
  1161  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  1162  	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
  1163  	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
  1164  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  1165  	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
  1166  	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
  1167  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  1168  	LONG $0x04c68348             // add    rsi, 4
  1169  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  1170  	JNE  LBB0_321
  1171  	JMP  LBB0_1013
  1172  
  1173  LBB0_554:
  1174  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  1175  	JE   LBB0_639
  1176  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  1177  	JNE  LBB0_1013
  1178  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1179  	JLE  LBB0_1013
  1180  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1181  	LONG $0x04f98341         // cmp    r9d, 4
  1182  	JAE  LBB0_655
  1183  	WORD $0xf631             // xor    esi, esi
  1184  
  1185  LBB0_664:
  1186  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1187  	WORD $0xf749; BYTE $0xd1 // not    r9
  1188  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1189  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1190  	LONG $0x03e78348         // and    rdi, 3
  1191  	JE   LBB0_666
  1192  
  1193  LBB0_665:
  1194  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  1195  	LONG $0xf1042b48 // sub    rax, qword [rcx + 8*rsi]
  1196  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  1197  	LONG $0x01c68348 // add    rsi, 1
  1198  	LONG $0xffc78348 // add    rdi, -1
  1199  	JNE  LBB0_665
  1200  
  1201  LBB0_666:
  1202  	LONG $0x03f98349 // cmp    r9, 3
  1203  	JB   LBB0_1013
  1204  
  1205  LBB0_667:
  1206  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  1207  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
  1208  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  1209  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  1210  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
  1211  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  1212  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  1213  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
  1214  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  1215  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  1216  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
  1217  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  1218  	LONG $0x04c68348             // add    rsi, 4
  1219  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  1220  	JNE  LBB0_667
  1221  	JMP  LBB0_1013
  1222  
  1223  LBB0_760:
  1224  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1225  	JLE  LBB0_1013
  1226  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1227  	LONG $0x10f98341         // cmp    r9d, 16
  1228  	JAE  LBB0_763
  1229  	WORD $0xf631             // xor    esi, esi
  1230  
  1231  LBB0_772:
  1232  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1233  	WORD $0xf749; BYTE $0xd1 // not    r9
  1234  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1235  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1236  	LONG $0x03e78348         // and    rdi, 3
  1237  	JE   LBB0_774
  1238  
  1239  LBB0_773:
  1240  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1241  	LONG $0x04af0f66; BYTE $0x72 // imul    ax, word [rdx + 2*rsi]
  1242  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1243  	LONG $0x01c68348             // add    rsi, 1
  1244  	LONG $0xffc78348             // add    rdi, -1
  1245  	JNE  LBB0_773
  1246  
  1247  LBB0_774:
  1248  	LONG $0x03f98349 // cmp    r9, 3
  1249  	JB   LBB0_1013
  1250  
  1251  LBB0_775:
  1252  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1253  	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
  1254  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1255  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1256  	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
  1257  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1258  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1259  	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
  1260  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1261  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1262  	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
  1263  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1264  	LONG $0x04c68348               // add    rsi, 4
  1265  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1266  	JNE  LBB0_775
  1267  	JMP  LBB0_1013
  1268  
  1269  LBB0_776:
  1270  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1271  	JLE  LBB0_1013
  1272  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1273  	LONG $0x10f98341         // cmp    r9d, 16
  1274  	JAE  LBB0_779
  1275  	WORD $0xf631             // xor    esi, esi
  1276  
  1277  LBB0_788:
  1278  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1279  	WORD $0xf749; BYTE $0xd1 // not    r9
  1280  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1281  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1282  	LONG $0x03e78348         // and    rdi, 3
  1283  	JE   LBB0_790
  1284  
  1285  LBB0_789:
  1286  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1287  	LONG $0x04af0f66; BYTE $0x72 // imul    ax, word [rdx + 2*rsi]
  1288  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1289  	LONG $0x01c68348             // add    rsi, 1
  1290  	LONG $0xffc78348             // add    rdi, -1
  1291  	JNE  LBB0_789
  1292  
  1293  LBB0_790:
  1294  	LONG $0x03f98349 // cmp    r9, 3
  1295  	JB   LBB0_1013
  1296  
  1297  LBB0_791:
  1298  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1299  	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
  1300  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1301  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1302  	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
  1303  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1304  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1305  	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
  1306  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1307  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1308  	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
  1309  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1310  	LONG $0x04c68348               // add    rsi, 4
  1311  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1312  	JNE  LBB0_791
  1313  	JMP  LBB0_1013
  1314  
  1315  LBB0_910:
  1316  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1317  	JLE  LBB0_1013
  1318  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1319  	LONG $0x10f98341         // cmp    r9d, 16
  1320  	JAE  LBB0_913
  1321  	WORD $0xf631             // xor    esi, esi
  1322  
  1323  LBB0_922:
  1324  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1325  	WORD $0xf749; BYTE $0xd1 // not    r9
  1326  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1327  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1328  	LONG $0x03e78348         // and    rdi, 3
  1329  	JE   LBB0_924
  1330  
  1331  LBB0_923:
  1332  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1333  	LONG $0x04af0f66; BYTE $0x72 // imul    ax, word [rdx + 2*rsi]
  1334  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1335  	LONG $0x01c68348             // add    rsi, 1
  1336  	LONG $0xffc78348             // add    rdi, -1
  1337  	JNE  LBB0_923
  1338  
  1339  LBB0_924:
  1340  	LONG $0x03f98349 // cmp    r9, 3
  1341  	JB   LBB0_1013
  1342  
  1343  LBB0_925:
  1344  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1345  	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
  1346  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1347  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1348  	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
  1349  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1350  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1351  	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
  1352  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1353  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1354  	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
  1355  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1356  	LONG $0x04c68348               // add    rsi, 4
  1357  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1358  	JNE  LBB0_925
  1359  	JMP  LBB0_1013
  1360  
  1361  LBB0_926:
  1362  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1363  	JLE  LBB0_1013
  1364  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1365  	LONG $0x10f98341         // cmp    r9d, 16
  1366  	JAE  LBB0_929
  1367  	WORD $0xf631             // xor    esi, esi
  1368  
  1369  LBB0_938:
  1370  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1371  	WORD $0xf749; BYTE $0xd1 // not    r9
  1372  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1373  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1374  	LONG $0x03e78348         // and    rdi, 3
  1375  	JE   LBB0_940
  1376  
  1377  LBB0_939:
  1378  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1379  	LONG $0x04af0f66; BYTE $0x72 // imul    ax, word [rdx + 2*rsi]
  1380  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1381  	LONG $0x01c68348             // add    rsi, 1
  1382  	LONG $0xffc78348             // add    rdi, -1
  1383  	JNE  LBB0_939
  1384  
  1385  LBB0_940:
  1386  	LONG $0x03f98349 // cmp    r9, 3
  1387  	JB   LBB0_1013
  1388  
  1389  LBB0_941:
  1390  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1391  	LONG $0x04af0f66; BYTE $0x72   // imul    ax, word [rdx + 2*rsi]
  1392  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1393  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1394  	LONG $0x44af0f66; WORD $0x0272 // imul    ax, word [rdx + 2*rsi + 2]
  1395  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1396  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1397  	LONG $0x44af0f66; WORD $0x0472 // imul    ax, word [rdx + 2*rsi + 4]
  1398  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1399  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1400  	LONG $0x44af0f66; WORD $0x0672 // imul    ax, word [rdx + 2*rsi + 6]
  1401  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1402  	LONG $0x04c68348               // add    rsi, 4
  1403  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1404  	JNE  LBB0_941
  1405  	JMP  LBB0_1013
  1406  
  1407  LBB0_75:
  1408  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1409  	JLE  LBB0_1013
  1410  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1411  	LONG $0x10f98341         // cmp    r9d, 16
  1412  	JAE  LBB0_78
  1413  	WORD $0xf631             // xor    esi, esi
  1414  
  1415  LBB0_87:
  1416  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1417  	WORD $0xf749; BYTE $0xd1 // not    r9
  1418  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1419  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1420  	LONG $0x03e78348         // and    rdi, 3
  1421  	JE   LBB0_89
  1422  
  1423  LBB0_88:
  1424  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1425  	LONG $0x72040366             // add    ax, word [rdx + 2*rsi]
  1426  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1427  	LONG $0x01c68348             // add    rsi, 1
  1428  	LONG $0xffc78348             // add    rdi, -1
  1429  	JNE  LBB0_88
  1430  
  1431  LBB0_89:
  1432  	LONG $0x03f98349 // cmp    r9, 3
  1433  	JB   LBB0_1013
  1434  
  1435  LBB0_90:
  1436  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1437  	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
  1438  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1439  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1440  	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
  1441  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1442  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1443  	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
  1444  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1445  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1446  	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
  1447  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1448  	LONG $0x04c68348               // add    rsi, 4
  1449  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1450  	JNE  LBB0_90
  1451  	JMP  LBB0_1013
  1452  
  1453  LBB0_91:
  1454  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1455  	JLE  LBB0_1013
  1456  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1457  	LONG $0x10f98341         // cmp    r9d, 16
  1458  	JAE  LBB0_94
  1459  	WORD $0xf631             // xor    esi, esi
  1460  
  1461  LBB0_103:
  1462  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1463  	WORD $0xf749; BYTE $0xd1 // not    r9
  1464  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1465  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1466  	LONG $0x03e78348         // and    rdi, 3
  1467  	JE   LBB0_105
  1468  
  1469  LBB0_104:
  1470  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1471  	LONG $0x72040366             // add    ax, word [rdx + 2*rsi]
  1472  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1473  	LONG $0x01c68348             // add    rsi, 1
  1474  	LONG $0xffc78348             // add    rdi, -1
  1475  	JNE  LBB0_104
  1476  
  1477  LBB0_105:
  1478  	LONG $0x03f98349 // cmp    r9, 3
  1479  	JB   LBB0_1013
  1480  
  1481  LBB0_106:
  1482  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1483  	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
  1484  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1485  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1486  	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
  1487  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1488  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1489  	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
  1490  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1491  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1492  	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
  1493  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1494  	LONG $0x04c68348               // add    rsi, 4
  1495  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1496  	JNE  LBB0_106
  1497  	JMP  LBB0_1013
  1498  
  1499  LBB0_421:
  1500  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1501  	JLE  LBB0_1013
  1502  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1503  	LONG $0x10f98341         // cmp    r9d, 16
  1504  	JAE  LBB0_424
  1505  	WORD $0xf631             // xor    esi, esi
  1506  
  1507  LBB0_433:
  1508  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1509  	WORD $0xf749; BYTE $0xd1 // not    r9
  1510  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1511  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1512  	LONG $0x03e78348         // and    rdi, 3
  1513  	JE   LBB0_435
  1514  
  1515  LBB0_434:
  1516  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1517  	LONG $0x71042b66             // sub    ax, word [rcx + 2*rsi]
  1518  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1519  	LONG $0x01c68348             // add    rsi, 1
  1520  	LONG $0xffc78348             // add    rdi, -1
  1521  	JNE  LBB0_434
  1522  
  1523  LBB0_435:
  1524  	LONG $0x03f98349 // cmp    r9, 3
  1525  	JB   LBB0_1013
  1526  
  1527  LBB0_436:
  1528  	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
  1529  	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
  1530  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1531  	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
  1532  	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
  1533  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1534  	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
  1535  	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
  1536  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1537  	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
  1538  	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
  1539  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1540  	LONG $0x04c68348               // add    rsi, 4
  1541  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1542  	JNE  LBB0_436
  1543  	JMP  LBB0_1013
  1544  
  1545  LBB0_437:
  1546  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1547  	JLE  LBB0_1013
  1548  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1549  	LONG $0x10f98341         // cmp    r9d, 16
  1550  	JAE  LBB0_440
  1551  	WORD $0xf631             // xor    esi, esi
  1552  
  1553  LBB0_449:
  1554  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1555  	WORD $0xf749; BYTE $0xd1 // not    r9
  1556  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1557  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1558  	LONG $0x03e78348         // and    rdi, 3
  1559  	JE   LBB0_451
  1560  
  1561  LBB0_450:
  1562  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1563  	LONG $0x71042b66             // sub    ax, word [rcx + 2*rsi]
  1564  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1565  	LONG $0x01c68348             // add    rsi, 1
  1566  	LONG $0xffc78348             // add    rdi, -1
  1567  	JNE  LBB0_450
  1568  
  1569  LBB0_451:
  1570  	LONG $0x03f98349 // cmp    r9, 3
  1571  	JB   LBB0_1013
  1572  
  1573  LBB0_452:
  1574  	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
  1575  	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
  1576  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1577  	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
  1578  	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
  1579  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1580  	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
  1581  	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
  1582  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1583  	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
  1584  	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
  1585  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1586  	LONG $0x04c68348               // add    rsi, 4
  1587  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1588  	JNE  LBB0_452
  1589  	JMP  LBB0_1013
  1590  
  1591  LBB0_248:
  1592  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1593  	JLE  LBB0_1013
  1594  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1595  	LONG $0x10f98341         // cmp    r9d, 16
  1596  	JAE  LBB0_251
  1597  	WORD $0xf631             // xor    esi, esi
  1598  
  1599  LBB0_260:
  1600  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1601  	WORD $0xf749; BYTE $0xd1 // not    r9
  1602  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1603  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1604  	LONG $0x03e78348         // and    rdi, 3
  1605  	JE   LBB0_262
  1606  
  1607  LBB0_261:
  1608  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1609  	LONG $0x72040366             // add    ax, word [rdx + 2*rsi]
  1610  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1611  	LONG $0x01c68348             // add    rsi, 1
  1612  	LONG $0xffc78348             // add    rdi, -1
  1613  	JNE  LBB0_261
  1614  
  1615  LBB0_262:
  1616  	LONG $0x03f98349 // cmp    r9, 3
  1617  	JB   LBB0_1013
  1618  
  1619  LBB0_263:
  1620  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1621  	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
  1622  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1623  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1624  	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
  1625  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1626  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1627  	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
  1628  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1629  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1630  	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
  1631  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1632  	LONG $0x04c68348               // add    rsi, 4
  1633  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1634  	JNE  LBB0_263
  1635  	JMP  LBB0_1013
  1636  
  1637  LBB0_264:
  1638  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1639  	JLE  LBB0_1013
  1640  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1641  	LONG $0x10f98341         // cmp    r9d, 16
  1642  	JAE  LBB0_267
  1643  	WORD $0xf631             // xor    esi, esi
  1644  
  1645  LBB0_276:
  1646  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1647  	WORD $0xf749; BYTE $0xd1 // not    r9
  1648  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1649  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1650  	LONG $0x03e78348         // and    rdi, 3
  1651  	JE   LBB0_278
  1652  
  1653  LBB0_277:
  1654  	LONG $0x7104b70f             // movzx    eax, word [rcx + 2*rsi]
  1655  	LONG $0x72040366             // add    ax, word [rdx + 2*rsi]
  1656  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1657  	LONG $0x01c68348             // add    rsi, 1
  1658  	LONG $0xffc78348             // add    rdi, -1
  1659  	JNE  LBB0_277
  1660  
  1661  LBB0_278:
  1662  	LONG $0x03f98349 // cmp    r9, 3
  1663  	JB   LBB0_1013
  1664  
  1665  LBB0_279:
  1666  	LONG $0x7104b70f               // movzx    eax, word [rcx + 2*rsi]
  1667  	LONG $0x72040366               // add    ax, word [rdx + 2*rsi]
  1668  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1669  	LONG $0x7144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rsi + 2]
  1670  	LONG $0x72440366; BYTE $0x02   // add    ax, word [rdx + 2*rsi + 2]
  1671  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1672  	LONG $0x7144b70f; BYTE $0x04   // movzx    eax, word [rcx + 2*rsi + 4]
  1673  	LONG $0x72440366; BYTE $0x04   // add    ax, word [rdx + 2*rsi + 4]
  1674  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1675  	LONG $0x7144b70f; BYTE $0x06   // movzx    eax, word [rcx + 2*rsi + 6]
  1676  	LONG $0x72440366; BYTE $0x06   // add    ax, word [rdx + 2*rsi + 6]
  1677  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1678  	LONG $0x04c68348               // add    rsi, 4
  1679  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1680  	JNE  LBB0_279
  1681  	JMP  LBB0_1013
  1682  
  1683  LBB0_594:
  1684  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1685  	JLE  LBB0_1013
  1686  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1687  	LONG $0x10f98341         // cmp    r9d, 16
  1688  	JAE  LBB0_597
  1689  	WORD $0xf631             // xor    esi, esi
  1690  
  1691  LBB0_606:
  1692  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1693  	WORD $0xf749; BYTE $0xd1 // not    r9
  1694  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1695  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1696  	LONG $0x03e78348         // and    rdi, 3
  1697  	JE   LBB0_608
  1698  
  1699  LBB0_607:
  1700  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1701  	LONG $0x71042b66             // sub    ax, word [rcx + 2*rsi]
  1702  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1703  	LONG $0x01c68348             // add    rsi, 1
  1704  	LONG $0xffc78348             // add    rdi, -1
  1705  	JNE  LBB0_607
  1706  
  1707  LBB0_608:
  1708  	LONG $0x03f98349 // cmp    r9, 3
  1709  	JB   LBB0_1013
  1710  
  1711  LBB0_609:
  1712  	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
  1713  	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
  1714  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1715  	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
  1716  	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
  1717  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1718  	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
  1719  	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
  1720  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1721  	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
  1722  	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
  1723  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1724  	LONG $0x04c68348               // add    rsi, 4
  1725  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1726  	JNE  LBB0_609
  1727  	JMP  LBB0_1013
  1728  
  1729  LBB0_610:
  1730  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1731  	JLE  LBB0_1013
  1732  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1733  	LONG $0x10f98341         // cmp    r9d, 16
  1734  	JAE  LBB0_613
  1735  	WORD $0xf631             // xor    esi, esi
  1736  
  1737  LBB0_622:
  1738  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1739  	WORD $0xf749; BYTE $0xd1 // not    r9
  1740  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1741  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1742  	LONG $0x03e78348         // and    rdi, 3
  1743  	JE   LBB0_624
  1744  
  1745  LBB0_623:
  1746  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
  1747  	LONG $0x71042b66             // sub    ax, word [rcx + 2*rsi]
  1748  	LONG $0x04894166; BYTE $0x70 // mov    word [r8 + 2*rsi], ax
  1749  	LONG $0x01c68348             // add    rsi, 1
  1750  	LONG $0xffc78348             // add    rdi, -1
  1751  	JNE  LBB0_623
  1752  
  1753  LBB0_624:
  1754  	LONG $0x03f98349 // cmp    r9, 3
  1755  	JB   LBB0_1013
  1756  
  1757  LBB0_625:
  1758  	LONG $0x7204b70f               // movzx    eax, word [rdx + 2*rsi]
  1759  	LONG $0x71042b66               // sub    ax, word [rcx + 2*rsi]
  1760  	LONG $0x04894166; BYTE $0x70   // mov    word [r8 + 2*rsi], ax
  1761  	LONG $0x7244b70f; BYTE $0x02   // movzx    eax, word [rdx + 2*rsi + 2]
  1762  	LONG $0x71442b66; BYTE $0x02   // sub    ax, word [rcx + 2*rsi + 2]
  1763  	LONG $0x44894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], ax
  1764  	LONG $0x7244b70f; BYTE $0x04   // movzx    eax, word [rdx + 2*rsi + 4]
  1765  	LONG $0x71442b66; BYTE $0x04   // sub    ax, word [rcx + 2*rsi + 4]
  1766  	LONG $0x44894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], ax
  1767  	LONG $0x7244b70f; BYTE $0x06   // movzx    eax, word [rdx + 2*rsi + 6]
  1768  	LONG $0x71442b66; BYTE $0x06   // sub    ax, word [rcx + 2*rsi + 6]
  1769  	LONG $0x44894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], ax
  1770  	LONG $0x04c68348               // add    rsi, 4
  1771  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  1772  	JNE  LBB0_625
  1773  	JMP  LBB0_1013
  1774  
  1775  LBB0_826:
  1776  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1777  	JLE  LBB0_1013
  1778  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  1779  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  1780  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  1781  	LONG $0x03e18341         // and    r9d, 3
  1782  	LONG $0x03ff8348         // cmp    rdi, 3
  1783  	JAE  LBB0_829
  1784  	WORD $0xff31             // xor    edi, edi
  1785  	JMP  LBB0_831
  1786  
  1787  LBB0_834:
  1788  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1789  	JLE  LBB0_1013
  1790  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1791  	LONG $0x08f98341         // cmp    r9d, 8
  1792  	JAE  LBB0_837
  1793  	WORD $0xf631             // xor    esi, esi
  1794  
  1795  LBB0_846:
  1796  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  1797  	WORD $0xf748; BYTE $0xd0 // not    rax
  1798  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  1799  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1800  	LONG $0x03e78348         // and    rdi, 3
  1801  	JE   LBB0_848
  1802  
  1803  LBB0_847:
  1804  	LONG $0x04100ff3; BYTE $0xb1   // movss    xmm0, dword [rcx + 4*rsi]
  1805  	LONG $0x04590ff3; BYTE $0xb2   // mulss    xmm0, dword [rdx + 4*rsi]
  1806  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  1807  	LONG $0x01c68348               // add    rsi, 1
  1808  	LONG $0xffc78348               // add    rdi, -1
  1809  	JNE  LBB0_847
  1810  
  1811  LBB0_848:
  1812  	LONG $0x03f88348 // cmp    rax, 3
  1813  	JB   LBB0_1013
  1814  
  1815  LBB0_849:
  1816  	LONG $0x04100ff3; BYTE $0xb1               // movss    xmm0, dword [rcx + 4*rsi]
  1817  	LONG $0x04590ff3; BYTE $0xb2               // mulss    xmm0, dword [rdx + 4*rsi]
  1818  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  1819  	LONG $0x44100ff3; WORD $0x04b1             // movss    xmm0, dword [rcx + 4*rsi + 4]
  1820  	LONG $0x44590ff3; WORD $0x04b2             // mulss    xmm0, dword [rdx + 4*rsi + 4]
  1821  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  1822  	LONG $0x44100ff3; WORD $0x08b1             // movss    xmm0, dword [rcx + 4*rsi + 8]
  1823  	LONG $0x44590ff3; WORD $0x08b2             // mulss    xmm0, dword [rdx + 4*rsi + 8]
  1824  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  1825  	LONG $0x44100ff3; WORD $0x0cb1             // movss    xmm0, dword [rcx + 4*rsi + 12]
  1826  	LONG $0x44590ff3; WORD $0x0cb2             // mulss    xmm0, dword [rdx + 4*rsi + 12]
  1827  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  1828  	LONG $0x04c68348                           // add    rsi, 4
  1829  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  1830  	JNE  LBB0_849
  1831  	JMP  LBB0_1013
  1832  
  1833  LBB0_976:
  1834  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1835  	JLE  LBB0_1013
  1836  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  1837  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  1838  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  1839  	LONG $0x03e18341         // and    r9d, 3
  1840  	LONG $0x03ff8348         // cmp    rdi, 3
  1841  	JAE  LBB0_979
  1842  	WORD $0xff31             // xor    edi, edi
  1843  	JMP  LBB0_981
  1844  
  1845  LBB0_984:
  1846  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1847  	JLE  LBB0_1013
  1848  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1849  	LONG $0x08f98341         // cmp    r9d, 8
  1850  	JAE  LBB0_987
  1851  	WORD $0xf631             // xor    esi, esi
  1852  
  1853  LBB0_996:
  1854  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  1855  	WORD $0xf748; BYTE $0xd0 // not    rax
  1856  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  1857  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1858  	LONG $0x03e78348         // and    rdi, 3
  1859  	JE   LBB0_998
  1860  
  1861  LBB0_997:
  1862  	LONG $0x04100ff3; BYTE $0xb1   // movss    xmm0, dword [rcx + 4*rsi]
  1863  	LONG $0x04590ff3; BYTE $0xb2   // mulss    xmm0, dword [rdx + 4*rsi]
  1864  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  1865  	LONG $0x01c68348               // add    rsi, 1
  1866  	LONG $0xffc78348               // add    rdi, -1
  1867  	JNE  LBB0_997
  1868  
  1869  LBB0_998:
  1870  	LONG $0x03f88348 // cmp    rax, 3
  1871  	JB   LBB0_1013
  1872  
  1873  LBB0_999:
  1874  	LONG $0x04100ff3; BYTE $0xb1               // movss    xmm0, dword [rcx + 4*rsi]
  1875  	LONG $0x04590ff3; BYTE $0xb2               // mulss    xmm0, dword [rdx + 4*rsi]
  1876  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  1877  	LONG $0x44100ff3; WORD $0x04b1             // movss    xmm0, dword [rcx + 4*rsi + 4]
  1878  	LONG $0x44590ff3; WORD $0x04b2             // mulss    xmm0, dword [rdx + 4*rsi + 4]
  1879  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  1880  	LONG $0x44100ff3; WORD $0x08b1             // movss    xmm0, dword [rcx + 4*rsi + 8]
  1881  	LONG $0x44590ff3; WORD $0x08b2             // mulss    xmm0, dword [rdx + 4*rsi + 8]
  1882  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  1883  	LONG $0x44100ff3; WORD $0x0cb1             // movss    xmm0, dword [rcx + 4*rsi + 12]
  1884  	LONG $0x44590ff3; WORD $0x0cb2             // mulss    xmm0, dword [rdx + 4*rsi + 12]
  1885  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  1886  	LONG $0x04c68348                           // add    rsi, 4
  1887  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  1888  	JNE  LBB0_999
  1889  	JMP  LBB0_1013
  1890  
  1891  LBB0_149:
  1892  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1893  	JLE  LBB0_1013
  1894  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1895  	LONG $0x04f98341         // cmp    r9d, 4
  1896  	JAE  LBB0_152
  1897  	WORD $0xf631             // xor    esi, esi
  1898  
  1899  LBB0_161:
  1900  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1901  	WORD $0xf749; BYTE $0xd1 // not    r9
  1902  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1903  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1904  	LONG $0x03e78348         // and    rdi, 3
  1905  	JE   LBB0_163
  1906  
  1907  LBB0_162:
  1908  	LONG $0xf1048b48 // mov    rax, qword [rcx + 8*rsi]
  1909  	LONG $0xf2040348 // add    rax, qword [rdx + 8*rsi]
  1910  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  1911  	LONG $0x01c68348 // add    rsi, 1
  1912  	LONG $0xffc78348 // add    rdi, -1
  1913  	JNE  LBB0_162
  1914  
  1915  LBB0_163:
  1916  	LONG $0x03f98349 // cmp    r9, 3
  1917  	JB   LBB0_1013
  1918  
  1919  LBB0_164:
  1920  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
  1921  	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
  1922  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  1923  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
  1924  	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
  1925  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  1926  	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
  1927  	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
  1928  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  1929  	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
  1930  	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
  1931  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  1932  	LONG $0x04c68348             // add    rsi, 4
  1933  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  1934  	JNE  LBB0_164
  1935  	JMP  LBB0_1013
  1936  
  1937  LBB0_165:
  1938  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1939  	JLE  LBB0_1013
  1940  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1941  	LONG $0x08f98341         // cmp    r9d, 8
  1942  	JAE  LBB0_168
  1943  	WORD $0xf631             // xor    esi, esi
  1944  
  1945  LBB0_177:
  1946  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  1947  	WORD $0xf748; BYTE $0xd0 // not    rax
  1948  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  1949  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1950  	LONG $0x03e78348         // and    rdi, 3
  1951  	JE   LBB0_179
  1952  
  1953  LBB0_178:
  1954  	LONG $0x04100ff3; BYTE $0xb1   // movss    xmm0, dword [rcx + 4*rsi]
  1955  	LONG $0x04580ff3; BYTE $0xb2   // addss    xmm0, dword [rdx + 4*rsi]
  1956  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  1957  	LONG $0x01c68348               // add    rsi, 1
  1958  	LONG $0xffc78348               // add    rdi, -1
  1959  	JNE  LBB0_178
  1960  
  1961  LBB0_179:
  1962  	LONG $0x03f88348 // cmp    rax, 3
  1963  	JB   LBB0_1013
  1964  
  1965  LBB0_180:
  1966  	LONG $0x04100ff3; BYTE $0xb1               // movss    xmm0, dword [rcx + 4*rsi]
  1967  	LONG $0x04580ff3; BYTE $0xb2               // addss    xmm0, dword [rdx + 4*rsi]
  1968  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  1969  	LONG $0x44100ff3; WORD $0x04b1             // movss    xmm0, dword [rcx + 4*rsi + 4]
  1970  	LONG $0x44580ff3; WORD $0x04b2             // addss    xmm0, dword [rdx + 4*rsi + 4]
  1971  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  1972  	LONG $0x44100ff3; WORD $0x08b1             // movss    xmm0, dword [rcx + 4*rsi + 8]
  1973  	LONG $0x44580ff3; WORD $0x08b2             // addss    xmm0, dword [rdx + 4*rsi + 8]
  1974  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  1975  	LONG $0x44100ff3; WORD $0x0cb1             // movss    xmm0, dword [rcx + 4*rsi + 12]
  1976  	LONG $0x44580ff3; WORD $0x0cb2             // addss    xmm0, dword [rdx + 4*rsi + 12]
  1977  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  1978  	LONG $0x04c68348                           // add    rsi, 4
  1979  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  1980  	JNE  LBB0_180
  1981  	JMP  LBB0_1013
  1982  
  1983  LBB0_495:
  1984  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  1985  	JLE  LBB0_1013
  1986  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  1987  	LONG $0x04f98341         // cmp    r9d, 4
  1988  	JAE  LBB0_498
  1989  	WORD $0xf631             // xor    esi, esi
  1990  
  1991  LBB0_507:
  1992  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  1993  	WORD $0xf749; BYTE $0xd1 // not    r9
  1994  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  1995  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  1996  	LONG $0x03e78348         // and    rdi, 3
  1997  	JE   LBB0_509
  1998  
  1999  LBB0_508:
  2000  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2001  	LONG $0xf1042b48 // sub    rax, qword [rcx + 8*rsi]
  2002  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  2003  	LONG $0x01c68348 // add    rsi, 1
  2004  	LONG $0xffc78348 // add    rdi, -1
  2005  	JNE  LBB0_508
  2006  
  2007  LBB0_509:
  2008  	LONG $0x03f98349 // cmp    r9, 3
  2009  	JB   LBB0_1013
  2010  
  2011  LBB0_510:
  2012  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2013  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
  2014  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  2015  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2016  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
  2017  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  2018  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2019  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
  2020  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  2021  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2022  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
  2023  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  2024  	LONG $0x04c68348             // add    rsi, 4
  2025  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2026  	JNE  LBB0_510
  2027  	JMP  LBB0_1013
  2028  
  2029  LBB0_511:
  2030  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2031  	JLE  LBB0_1013
  2032  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2033  	LONG $0x08f98341         // cmp    r9d, 8
  2034  	JAE  LBB0_514
  2035  	WORD $0xf631             // xor    esi, esi
  2036  
  2037  LBB0_523:
  2038  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  2039  	WORD $0xf748; BYTE $0xd0 // not    rax
  2040  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  2041  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2042  	LONG $0x03e78348         // and    rdi, 3
  2043  	JE   LBB0_525
  2044  
  2045  LBB0_524:
  2046  	LONG $0x04100ff3; BYTE $0xb2   // movss    xmm0, dword [rdx + 4*rsi]
  2047  	LONG $0x045c0ff3; BYTE $0xb1   // subss    xmm0, dword [rcx + 4*rsi]
  2048  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  2049  	LONG $0x01c68348               // add    rsi, 1
  2050  	LONG $0xffc78348               // add    rdi, -1
  2051  	JNE  LBB0_524
  2052  
  2053  LBB0_525:
  2054  	LONG $0x03f88348 // cmp    rax, 3
  2055  	JB   LBB0_1013
  2056  
  2057  LBB0_526:
  2058  	LONG $0x04100ff3; BYTE $0xb2               // movss    xmm0, dword [rdx + 4*rsi]
  2059  	LONG $0x045c0ff3; BYTE $0xb1               // subss    xmm0, dword [rcx + 4*rsi]
  2060  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  2061  	LONG $0x44100ff3; WORD $0x04b2             // movss    xmm0, dword [rdx + 4*rsi + 4]
  2062  	LONG $0x445c0ff3; WORD $0x04b1             // subss    xmm0, dword [rcx + 4*rsi + 4]
  2063  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  2064  	LONG $0x44100ff3; WORD $0x08b2             // movss    xmm0, dword [rdx + 4*rsi + 8]
  2065  	LONG $0x445c0ff3; WORD $0x08b1             // subss    xmm0, dword [rcx + 4*rsi + 8]
  2066  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  2067  	LONG $0x44100ff3; WORD $0x0cb2             // movss    xmm0, dword [rdx + 4*rsi + 12]
  2068  	LONG $0x445c0ff3; WORD $0x0cb1             // subss    xmm0, dword [rcx + 4*rsi + 12]
  2069  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  2070  	LONG $0x04c68348                           // add    rsi, 4
  2071  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  2072  	JNE  LBB0_526
  2073  	JMP  LBB0_1013
  2074  
  2075  LBB0_322:
  2076  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2077  	JLE  LBB0_1013
  2078  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2079  	LONG $0x04f98341         // cmp    r9d, 4
  2080  	JAE  LBB0_325
  2081  	WORD $0xf631             // xor    esi, esi
  2082  
  2083  LBB0_334:
  2084  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2085  	WORD $0xf749; BYTE $0xd1 // not    r9
  2086  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2087  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2088  	LONG $0x03e78348         // and    rdi, 3
  2089  	JE   LBB0_336
  2090  
  2091  LBB0_335:
  2092  	LONG $0xf1048b48 // mov    rax, qword [rcx + 8*rsi]
  2093  	LONG $0xf2040348 // add    rax, qword [rdx + 8*rsi]
  2094  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  2095  	LONG $0x01c68348 // add    rsi, 1
  2096  	LONG $0xffc78348 // add    rdi, -1
  2097  	JNE  LBB0_335
  2098  
  2099  LBB0_336:
  2100  	LONG $0x03f98349 // cmp    r9, 3
  2101  	JB   LBB0_1013
  2102  
  2103  LBB0_337:
  2104  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
  2105  	LONG $0xf2040348             // add    rax, qword [rdx + 8*rsi]
  2106  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  2107  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
  2108  	LONG $0xf2440348; BYTE $0x08 // add    rax, qword [rdx + 8*rsi + 8]
  2109  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  2110  	LONG $0xf1448b48; BYTE $0x10 // mov    rax, qword [rcx + 8*rsi + 16]
  2111  	LONG $0xf2440348; BYTE $0x10 // add    rax, qword [rdx + 8*rsi + 16]
  2112  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  2113  	LONG $0xf1448b48; BYTE $0x18 // mov    rax, qword [rcx + 8*rsi + 24]
  2114  	LONG $0xf2440348; BYTE $0x18 // add    rax, qword [rdx + 8*rsi + 24]
  2115  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  2116  	LONG $0x04c68348             // add    rsi, 4
  2117  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2118  	JNE  LBB0_337
  2119  	JMP  LBB0_1013
  2120  
  2121  LBB0_338:
  2122  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2123  	JLE  LBB0_1013
  2124  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2125  	LONG $0x08f98341         // cmp    r9d, 8
  2126  	JAE  LBB0_341
  2127  	WORD $0xf631             // xor    esi, esi
  2128  
  2129  LBB0_350:
  2130  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  2131  	WORD $0xf748; BYTE $0xd0 // not    rax
  2132  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  2133  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2134  	LONG $0x03e78348         // and    rdi, 3
  2135  	JE   LBB0_352
  2136  
  2137  LBB0_351:
  2138  	LONG $0x04100ff3; BYTE $0xb1   // movss    xmm0, dword [rcx + 4*rsi]
  2139  	LONG $0x04580ff3; BYTE $0xb2   // addss    xmm0, dword [rdx + 4*rsi]
  2140  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  2141  	LONG $0x01c68348               // add    rsi, 1
  2142  	LONG $0xffc78348               // add    rdi, -1
  2143  	JNE  LBB0_351
  2144  
  2145  LBB0_352:
  2146  	LONG $0x03f88348 // cmp    rax, 3
  2147  	JB   LBB0_1013
  2148  
  2149  LBB0_353:
  2150  	LONG $0x04100ff3; BYTE $0xb1               // movss    xmm0, dword [rcx + 4*rsi]
  2151  	LONG $0x04580ff3; BYTE $0xb2               // addss    xmm0, dword [rdx + 4*rsi]
  2152  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  2153  	LONG $0x44100ff3; WORD $0x04b1             // movss    xmm0, dword [rcx + 4*rsi + 4]
  2154  	LONG $0x44580ff3; WORD $0x04b2             // addss    xmm0, dword [rdx + 4*rsi + 4]
  2155  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  2156  	LONG $0x44100ff3; WORD $0x08b1             // movss    xmm0, dword [rcx + 4*rsi + 8]
  2157  	LONG $0x44580ff3; WORD $0x08b2             // addss    xmm0, dword [rdx + 4*rsi + 8]
  2158  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  2159  	LONG $0x44100ff3; WORD $0x0cb1             // movss    xmm0, dword [rcx + 4*rsi + 12]
  2160  	LONG $0x44580ff3; WORD $0x0cb2             // addss    xmm0, dword [rdx + 4*rsi + 12]
  2161  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  2162  	LONG $0x04c68348                           // add    rsi, 4
  2163  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  2164  	JNE  LBB0_353
  2165  	JMP  LBB0_1013
  2166  
  2167  LBB0_668:
  2168  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2169  	JLE  LBB0_1013
  2170  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2171  	LONG $0x04f98341         // cmp    r9d, 4
  2172  	JAE  LBB0_671
  2173  	WORD $0xf631             // xor    esi, esi
  2174  
  2175  LBB0_680:
  2176  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2177  	WORD $0xf749; BYTE $0xd1 // not    r9
  2178  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2179  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2180  	LONG $0x03e78348         // and    rdi, 3
  2181  	JE   LBB0_682
  2182  
  2183  LBB0_681:
  2184  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
  2185  	LONG $0xf1042b48 // sub    rax, qword [rcx + 8*rsi]
  2186  	LONG $0xf0048949 // mov    qword [r8 + 8*rsi], rax
  2187  	LONG $0x01c68348 // add    rsi, 1
  2188  	LONG $0xffc78348 // add    rdi, -1
  2189  	JNE  LBB0_681
  2190  
  2191  LBB0_682:
  2192  	LONG $0x03f98349 // cmp    r9, 3
  2193  	JB   LBB0_1013
  2194  
  2195  LBB0_683:
  2196  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
  2197  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
  2198  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
  2199  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
  2200  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
  2201  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
  2202  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
  2203  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
  2204  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
  2205  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
  2206  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
  2207  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
  2208  	LONG $0x04c68348             // add    rsi, 4
  2209  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2210  	JNE  LBB0_683
  2211  	JMP  LBB0_1013
  2212  
  2213  LBB0_684:
  2214  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2215  	JLE  LBB0_1013
  2216  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2217  	LONG $0x08f98341         // cmp    r9d, 8
  2218  	JAE  LBB0_687
  2219  	WORD $0xf631             // xor    esi, esi
  2220  
  2221  LBB0_696:
  2222  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
  2223  	WORD $0xf748; BYTE $0xd0 // not    rax
  2224  	WORD $0x014c; BYTE $0xd0 // add    rax, r10
  2225  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2226  	LONG $0x03e78348         // and    rdi, 3
  2227  	JE   LBB0_698
  2228  
  2229  LBB0_697:
  2230  	LONG $0x04100ff3; BYTE $0xb2   // movss    xmm0, dword [rdx + 4*rsi]
  2231  	LONG $0x045c0ff3; BYTE $0xb1   // subss    xmm0, dword [rcx + 4*rsi]
  2232  	LONG $0x110f41f3; WORD $0xb004 // movss    dword [r8 + 4*rsi], xmm0
  2233  	LONG $0x01c68348               // add    rsi, 1
  2234  	LONG $0xffc78348               // add    rdi, -1
  2235  	JNE  LBB0_697
  2236  
  2237  LBB0_698:
  2238  	LONG $0x03f88348 // cmp    rax, 3
  2239  	JB   LBB0_1013
  2240  
  2241  LBB0_699:
  2242  	LONG $0x04100ff3; BYTE $0xb2               // movss    xmm0, dword [rdx + 4*rsi]
  2243  	LONG $0x045c0ff3; BYTE $0xb1               // subss    xmm0, dword [rcx + 4*rsi]
  2244  	LONG $0x110f41f3; WORD $0xb004             // movss    dword [r8 + 4*rsi], xmm0
  2245  	LONG $0x44100ff3; WORD $0x04b2             // movss    xmm0, dword [rdx + 4*rsi + 4]
  2246  	LONG $0x445c0ff3; WORD $0x04b1             // subss    xmm0, dword [rcx + 4*rsi + 4]
  2247  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm0
  2248  	LONG $0x44100ff3; WORD $0x08b2             // movss    xmm0, dword [rdx + 4*rsi + 8]
  2249  	LONG $0x445c0ff3; WORD $0x08b1             // subss    xmm0, dword [rcx + 4*rsi + 8]
  2250  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm0
  2251  	LONG $0x44100ff3; WORD $0x0cb2             // movss    xmm0, dword [rdx + 4*rsi + 12]
  2252  	LONG $0x445c0ff3; WORD $0x0cb1             // subss    xmm0, dword [rcx + 4*rsi + 12]
  2253  	LONG $0x110f41f3; WORD $0xb044; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm0
  2254  	LONG $0x04c68348                           // add    rsi, 4
  2255  	WORD $0x3949; BYTE $0xf2                   // cmp    r10, rsi
  2256  	JNE  LBB0_699
  2257  	JMP  LBB0_1013
  2258  
  2259  LBB0_731:
  2260  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2261  	JLE  LBB0_1013
  2262  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2263  	LONG $0x20f98341         // cmp    r9d, 32
  2264  	JAE  LBB0_734
  2265  	WORD $0xff31             // xor    edi, edi
  2266  
  2267  LBB0_743:
  2268  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  2269  	WORD $0xf749; BYTE $0xd1 // not    r9
  2270  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2271  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  2272  	LONG $0x03e68348         // and    rsi, 3
  2273  	JE   LBB0_745
  2274  
  2275  LBB0_744:
  2276  	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
  2277  	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
  2278  	LONG $0x38048841         // mov    byte [r8 + rdi], al
  2279  	LONG $0x01c78348         // add    rdi, 1
  2280  	LONG $0xffc68348         // add    rsi, -1
  2281  	JNE  LBB0_744
  2282  
  2283  LBB0_745:
  2284  	LONG $0x03f98349 // cmp    r9, 3
  2285  	JB   LBB0_1013
  2286  
  2287  LBB0_746:
  2288  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
  2289  	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
  2290  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  2291  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
  2292  	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
  2293  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  2294  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
  2295  	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
  2296  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  2297  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
  2298  	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
  2299  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  2300  	LONG $0x04c78348             // add    rdi, 4
  2301  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  2302  	JNE  LBB0_746
  2303  	JMP  LBB0_1013
  2304  
  2305  LBB0_881:
  2306  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2307  	JLE  LBB0_1013
  2308  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2309  	LONG $0x20f98341         // cmp    r9d, 32
  2310  	JAE  LBB0_884
  2311  	WORD $0xff31             // xor    edi, edi
  2312  
  2313  LBB0_893:
  2314  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  2315  	WORD $0xf749; BYTE $0xd1 // not    r9
  2316  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2317  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  2318  	LONG $0x03e68348         // and    rsi, 3
  2319  	JE   LBB0_895
  2320  
  2321  LBB0_894:
  2322  	LONG $0x3904b60f         // movzx    eax, byte [rcx + rdi]
  2323  	WORD $0x24f6; BYTE $0x3a // mul    byte [rdx + rdi]
  2324  	LONG $0x38048841         // mov    byte [r8 + rdi], al
  2325  	LONG $0x01c78348         // add    rdi, 1
  2326  	LONG $0xffc68348         // add    rsi, -1
  2327  	JNE  LBB0_894
  2328  
  2329  LBB0_895:
  2330  	LONG $0x03f98349 // cmp    r9, 3
  2331  	JB   LBB0_1013
  2332  
  2333  LBB0_896:
  2334  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
  2335  	WORD $0x24f6; BYTE $0x3a     // mul    byte [rdx + rdi]
  2336  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  2337  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
  2338  	LONG $0x013a64f6             // mul    byte [rdx + rdi + 1]
  2339  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  2340  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
  2341  	LONG $0x023a64f6             // mul    byte [rdx + rdi + 2]
  2342  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  2343  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
  2344  	LONG $0x033a64f6             // mul    byte [rdx + rdi + 3]
  2345  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  2346  	LONG $0x04c78348             // add    rdi, 4
  2347  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  2348  	JNE  LBB0_896
  2349  	JMP  LBB0_1013
  2350  
  2351  LBB0_46:
  2352  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2353  	JLE  LBB0_1013
  2354  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2355  	LONG $0x20f98341         // cmp    r9d, 32
  2356  	JAE  LBB0_49
  2357  	WORD $0xf631             // xor    esi, esi
  2358  
  2359  LBB0_58:
  2360  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2361  	WORD $0xf749; BYTE $0xd1 // not    r9
  2362  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2363  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2364  	LONG $0x03e78348         // and    rdi, 3
  2365  	JE   LBB0_60
  2366  
  2367  LBB0_59:
  2368  	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
  2369  	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
  2370  	LONG $0x30048841         // mov    byte [r8 + rsi], al
  2371  	LONG $0x01c68348         // add    rsi, 1
  2372  	LONG $0xffc78348         // add    rdi, -1
  2373  	JNE  LBB0_59
  2374  
  2375  LBB0_60:
  2376  	LONG $0x03f98349 // cmp    r9, 3
  2377  	JB   LBB0_1013
  2378  
  2379  LBB0_61:
  2380  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
  2381  	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
  2382  	LONG $0x30048841             // mov    byte [r8 + rsi], al
  2383  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
  2384  	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
  2385  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
  2386  	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
  2387  	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
  2388  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
  2389  	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
  2390  	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
  2391  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
  2392  	LONG $0x04c68348             // add    rsi, 4
  2393  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2394  	JNE  LBB0_61
  2395  	JMP  LBB0_1013
  2396  
  2397  LBB0_392:
  2398  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2399  	JLE  LBB0_1013
  2400  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2401  	LONG $0x20f98341         // cmp    r9d, 32
  2402  	JAE  LBB0_395
  2403  	WORD $0xf631             // xor    esi, esi
  2404  
  2405  LBB0_404:
  2406  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2407  	WORD $0xf749; BYTE $0xd1 // not    r9
  2408  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2409  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2410  	LONG $0x03e78348         // and    rdi, 3
  2411  	JE   LBB0_406
  2412  
  2413  LBB0_405:
  2414  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2415  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
  2416  	LONG $0x30048841         // mov    byte [r8 + rsi], al
  2417  	LONG $0x01c68348         // add    rsi, 1
  2418  	LONG $0xffc78348         // add    rdi, -1
  2419  	JNE  LBB0_405
  2420  
  2421  LBB0_406:
  2422  	LONG $0x03f98349 // cmp    r9, 3
  2423  	JB   LBB0_1013
  2424  
  2425  LBB0_407:
  2426  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2427  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
  2428  	LONG $0x30048841             // mov    byte [r8 + rsi], al
  2429  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2430  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
  2431  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
  2432  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2433  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
  2434  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
  2435  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2436  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
  2437  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
  2438  	LONG $0x04c68348             // add    rsi, 4
  2439  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2440  	JNE  LBB0_407
  2441  	JMP  LBB0_1013
  2442  
  2443  LBB0_219:
  2444  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2445  	JLE  LBB0_1013
  2446  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2447  	LONG $0x20f98341         // cmp    r9d, 32
  2448  	JAE  LBB0_222
  2449  	WORD $0xf631             // xor    esi, esi
  2450  
  2451  LBB0_231:
  2452  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2453  	WORD $0xf749; BYTE $0xd1 // not    r9
  2454  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2455  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2456  	LONG $0x03e78348         // and    rdi, 3
  2457  	JE   LBB0_233
  2458  
  2459  LBB0_232:
  2460  	LONG $0x3104b60f         // movzx    eax, byte [rcx + rsi]
  2461  	WORD $0x0402; BYTE $0x32 // add    al, byte [rdx + rsi]
  2462  	LONG $0x30048841         // mov    byte [r8 + rsi], al
  2463  	LONG $0x01c68348         // add    rsi, 1
  2464  	LONG $0xffc78348         // add    rdi, -1
  2465  	JNE  LBB0_232
  2466  
  2467  LBB0_233:
  2468  	LONG $0x03f98349 // cmp    r9, 3
  2469  	JB   LBB0_1013
  2470  
  2471  LBB0_234:
  2472  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
  2473  	WORD $0x0402; BYTE $0x32     // add    al, byte [rdx + rsi]
  2474  	LONG $0x30048841             // mov    byte [r8 + rsi], al
  2475  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
  2476  	LONG $0x01324402             // add    al, byte [rdx + rsi + 1]
  2477  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
  2478  	LONG $0x3144b60f; BYTE $0x02 // movzx    eax, byte [rcx + rsi + 2]
  2479  	LONG $0x02324402             // add    al, byte [rdx + rsi + 2]
  2480  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
  2481  	LONG $0x3144b60f; BYTE $0x03 // movzx    eax, byte [rcx + rsi + 3]
  2482  	LONG $0x03324402             // add    al, byte [rdx + rsi + 3]
  2483  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
  2484  	LONG $0x04c68348             // add    rsi, 4
  2485  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2486  	JNE  LBB0_234
  2487  	JMP  LBB0_1013
  2488  
  2489  LBB0_565:
  2490  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2491  	JLE  LBB0_1013
  2492  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2493  	LONG $0x20f98341         // cmp    r9d, 32
  2494  	JAE  LBB0_568
  2495  	WORD $0xf631             // xor    esi, esi
  2496  
  2497  LBB0_577:
  2498  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2499  	WORD $0xf749; BYTE $0xd1 // not    r9
  2500  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2501  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2502  	LONG $0x03e78348         // and    rdi, 3
  2503  	JE   LBB0_579
  2504  
  2505  LBB0_578:
  2506  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
  2507  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
  2508  	LONG $0x30048841         // mov    byte [r8 + rsi], al
  2509  	LONG $0x01c68348         // add    rsi, 1
  2510  	LONG $0xffc78348         // add    rdi, -1
  2511  	JNE  LBB0_578
  2512  
  2513  LBB0_579:
  2514  	LONG $0x03f98349 // cmp    r9, 3
  2515  	JB   LBB0_1013
  2516  
  2517  LBB0_580:
  2518  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
  2519  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
  2520  	LONG $0x30048841             // mov    byte [r8 + rsi], al
  2521  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
  2522  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
  2523  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
  2524  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
  2525  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
  2526  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
  2527  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
  2528  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
  2529  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
  2530  	LONG $0x04c68348             // add    rsi, 4
  2531  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2532  	JNE  LBB0_580
  2533  	JMP  LBB0_1013
  2534  
  2535  LBB0_805:
  2536  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2537  	JLE  LBB0_1013
  2538  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2539  	LONG $0x08f98341         // cmp    r9d, 8
  2540  	JAE  LBB0_808
  2541  	WORD $0xf631             // xor    esi, esi
  2542  
  2543  LBB0_817:
  2544  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2545  	WORD $0xf749; BYTE $0xd1 // not    r9
  2546  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2547  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2548  	LONG $0x03e78348         // and    rdi, 3
  2549  	JE   LBB0_819
  2550  
  2551  LBB0_818:
  2552  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
  2553  	LONG $0xb204af0f         // imul    eax, dword [rdx + 4*rsi]
  2554  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2555  	LONG $0x01c68348         // add    rsi, 1
  2556  	LONG $0xffc78348         // add    rdi, -1
  2557  	JNE  LBB0_818
  2558  
  2559  LBB0_819:
  2560  	LONG $0x03f98349 // cmp    r9, 3
  2561  	JB   LBB0_1013
  2562  
  2563  LBB0_820:
  2564  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
  2565  	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
  2566  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2567  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
  2568  	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
  2569  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2570  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
  2571  	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
  2572  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2573  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
  2574  	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
  2575  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2576  	LONG $0x04c68348             // add    rsi, 4
  2577  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2578  	JNE  LBB0_820
  2579  	JMP  LBB0_1013
  2580  
  2581  LBB0_955:
  2582  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2583  	JLE  LBB0_1013
  2584  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2585  	LONG $0x08f98341         // cmp    r9d, 8
  2586  	JAE  LBB0_958
  2587  	WORD $0xf631             // xor    esi, esi
  2588  
  2589  LBB0_967:
  2590  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2591  	WORD $0xf749; BYTE $0xd1 // not    r9
  2592  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2593  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2594  	LONG $0x03e78348         // and    rdi, 3
  2595  	JE   LBB0_969
  2596  
  2597  LBB0_968:
  2598  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
  2599  	LONG $0xb204af0f         // imul    eax, dword [rdx + 4*rsi]
  2600  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2601  	LONG $0x01c68348         // add    rsi, 1
  2602  	LONG $0xffc78348         // add    rdi, -1
  2603  	JNE  LBB0_968
  2604  
  2605  LBB0_969:
  2606  	LONG $0x03f98349 // cmp    r9, 3
  2607  	JB   LBB0_1013
  2608  
  2609  LBB0_970:
  2610  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
  2611  	LONG $0xb204af0f             // imul    eax, dword [rdx + 4*rsi]
  2612  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2613  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
  2614  	LONG $0xb244af0f; BYTE $0x04 // imul    eax, dword [rdx + 4*rsi + 4]
  2615  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2616  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
  2617  	LONG $0xb244af0f; BYTE $0x08 // imul    eax, dword [rdx + 4*rsi + 8]
  2618  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2619  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
  2620  	LONG $0xb244af0f; BYTE $0x0c // imul    eax, dword [rdx + 4*rsi + 12]
  2621  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2622  	LONG $0x04c68348             // add    rsi, 4
  2623  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2624  	JNE  LBB0_970
  2625  	JMP  LBB0_1013
  2626  
  2627  LBB0_120:
  2628  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2629  	JLE  LBB0_1013
  2630  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2631  	LONG $0x08f98341         // cmp    r9d, 8
  2632  	JAE  LBB0_123
  2633  	WORD $0xf631             // xor    esi, esi
  2634  
  2635  LBB0_132:
  2636  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2637  	WORD $0xf749; BYTE $0xd1 // not    r9
  2638  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2639  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2640  	LONG $0x03e78348         // and    rdi, 3
  2641  	JE   LBB0_134
  2642  
  2643  LBB0_133:
  2644  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
  2645  	WORD $0x0403; BYTE $0xb2 // add    eax, dword [rdx + 4*rsi]
  2646  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2647  	LONG $0x01c68348         // add    rsi, 1
  2648  	LONG $0xffc78348         // add    rdi, -1
  2649  	JNE  LBB0_133
  2650  
  2651  LBB0_134:
  2652  	LONG $0x03f98349 // cmp    r9, 3
  2653  	JB   LBB0_1013
  2654  
  2655  LBB0_135:
  2656  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
  2657  	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
  2658  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2659  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
  2660  	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
  2661  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2662  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
  2663  	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
  2664  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2665  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
  2666  	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
  2667  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2668  	LONG $0x04c68348             // add    rsi, 4
  2669  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2670  	JNE  LBB0_135
  2671  	JMP  LBB0_1013
  2672  
  2673  LBB0_466:
  2674  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2675  	JLE  LBB0_1013
  2676  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2677  	LONG $0x08f98341         // cmp    r9d, 8
  2678  	JAE  LBB0_469
  2679  	WORD $0xf631             // xor    esi, esi
  2680  
  2681  LBB0_478:
  2682  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2683  	WORD $0xf749; BYTE $0xd1 // not    r9
  2684  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2685  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2686  	LONG $0x03e78348         // and    rdi, 3
  2687  	JE   LBB0_480
  2688  
  2689  LBB0_479:
  2690  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  2691  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
  2692  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2693  	LONG $0x01c68348         // add    rsi, 1
  2694  	LONG $0xffc78348         // add    rdi, -1
  2695  	JNE  LBB0_479
  2696  
  2697  LBB0_480:
  2698  	LONG $0x03f98349 // cmp    r9, 3
  2699  	JB   LBB0_1013
  2700  
  2701  LBB0_481:
  2702  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  2703  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
  2704  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2705  	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
  2706  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
  2707  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2708  	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
  2709  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
  2710  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2711  	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
  2712  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
  2713  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2714  	LONG $0x04c68348             // add    rsi, 4
  2715  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2716  	JNE  LBB0_481
  2717  	JMP  LBB0_1013
  2718  
  2719  LBB0_293:
  2720  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2721  	JLE  LBB0_1013
  2722  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2723  	LONG $0x08f98341         // cmp    r9d, 8
  2724  	JAE  LBB0_296
  2725  	WORD $0xf631             // xor    esi, esi
  2726  
  2727  LBB0_305:
  2728  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2729  	WORD $0xf749; BYTE $0xd1 // not    r9
  2730  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2731  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2732  	LONG $0x03e78348         // and    rdi, 3
  2733  	JE   LBB0_307
  2734  
  2735  LBB0_306:
  2736  	WORD $0x048b; BYTE $0xb1 // mov    eax, dword [rcx + 4*rsi]
  2737  	WORD $0x0403; BYTE $0xb2 // add    eax, dword [rdx + 4*rsi]
  2738  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2739  	LONG $0x01c68348         // add    rsi, 1
  2740  	LONG $0xffc78348         // add    rdi, -1
  2741  	JNE  LBB0_306
  2742  
  2743  LBB0_307:
  2744  	LONG $0x03f98349 // cmp    r9, 3
  2745  	JB   LBB0_1013
  2746  
  2747  LBB0_308:
  2748  	WORD $0x048b; BYTE $0xb1     // mov    eax, dword [rcx + 4*rsi]
  2749  	WORD $0x0403; BYTE $0xb2     // add    eax, dword [rdx + 4*rsi]
  2750  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2751  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
  2752  	LONG $0x04b24403             // add    eax, dword [rdx + 4*rsi + 4]
  2753  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2754  	LONG $0x08b1448b             // mov    eax, dword [rcx + 4*rsi + 8]
  2755  	LONG $0x08b24403             // add    eax, dword [rdx + 4*rsi + 8]
  2756  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2757  	LONG $0x0cb1448b             // mov    eax, dword [rcx + 4*rsi + 12]
  2758  	LONG $0x0cb24403             // add    eax, dword [rdx + 4*rsi + 12]
  2759  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2760  	LONG $0x04c68348             // add    rsi, 4
  2761  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2762  	JNE  LBB0_308
  2763  	JMP  LBB0_1013
  2764  
  2765  LBB0_639:
  2766  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  2767  	JLE  LBB0_1013
  2768  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  2769  	LONG $0x08f98341         // cmp    r9d, 8
  2770  	JAE  LBB0_642
  2771  	WORD $0xf631             // xor    esi, esi
  2772  
  2773  LBB0_651:
  2774  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  2775  	WORD $0xf749; BYTE $0xd1 // not    r9
  2776  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  2777  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  2778  	LONG $0x03e78348         // and    rdi, 3
  2779  	JE   LBB0_653
  2780  
  2781  LBB0_652:
  2782  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
  2783  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
  2784  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
  2785  	LONG $0x01c68348         // add    rsi, 1
  2786  	LONG $0xffc78348         // add    rdi, -1
  2787  	JNE  LBB0_652
  2788  
  2789  LBB0_653:
  2790  	LONG $0x03f98349 // cmp    r9, 3
  2791  	JB   LBB0_1013
  2792  
  2793  LBB0_654:
  2794  	WORD $0x048b; BYTE $0xb2     // mov    eax, dword [rdx + 4*rsi]
  2795  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
  2796  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
  2797  	LONG $0x04b2448b             // mov    eax, dword [rdx + 4*rsi + 4]
  2798  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
  2799  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
  2800  	LONG $0x08b2448b             // mov    eax, dword [rdx + 4*rsi + 8]
  2801  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
  2802  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
  2803  	LONG $0x0cb2448b             // mov    eax, dword [rdx + 4*rsi + 12]
  2804  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
  2805  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
  2806  	LONG $0x04c68348             // add    rsi, 4
  2807  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  2808  	JNE  LBB0_654
  2809  	JMP  LBB0_1013
  2810  
  2811  LBB0_792:
  2812  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  2813  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  2814  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2815  	LONG $0xd1970f41         // seta    r9b
  2816  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  2817  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  2818  	LONG $0xd3970f41         // seta    r11b
  2819  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2820  	WORD $0x970f; BYTE $0xd0 // seta    al
  2821  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  2822  	LONG $0xd7970f40         // seta    dil
  2823  	WORD $0xf631             // xor    esi, esi
  2824  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  2825  	JNE  LBB0_801
  2826  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  2827  	JNE  LBB0_801
  2828  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  2829  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  2830  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  2831  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  2832  	LONG $0x03e9c149         // shr    r9, 3
  2833  	LONG $0x01c18349         // add    r9, 1
  2834  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  2835  	JE   LBB0_795
  2836  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  2837  	LONG $0xfee08348         // and    rax, -2
  2838  	WORD $0xf748; BYTE $0xd8 // neg    rax
  2839  	WORD $0xff31             // xor    edi, edi
  2840  
  2841  LBB0_797:
  2842  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  2843  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  2844  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  2845  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  2846  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  2847  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  2848  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  2849  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  2850  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  2851  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  2852  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  2853  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  2854  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  2855  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  2856  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  2857  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  2858  	LONG $0x10c78348                           // add    rdi, 16
  2859  	LONG $0x02c08348                           // add    rax, 2
  2860  	JNE  LBB0_797
  2861  	JMP  LBB0_798
  2862  
  2863  LBB0_942:
  2864  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  2865  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  2866  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2867  	LONG $0xd1970f41         // seta    r9b
  2868  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  2869  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  2870  	LONG $0xd3970f41         // seta    r11b
  2871  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2872  	WORD $0x970f; BYTE $0xd0 // seta    al
  2873  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  2874  	LONG $0xd7970f40         // seta    dil
  2875  	WORD $0xf631             // xor    esi, esi
  2876  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  2877  	JNE  LBB0_951
  2878  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  2879  	JNE  LBB0_951
  2880  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  2881  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  2882  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  2883  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  2884  	LONG $0x03e9c149         // shr    r9, 3
  2885  	LONG $0x01c18349         // add    r9, 1
  2886  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  2887  	JE   LBB0_945
  2888  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  2889  	LONG $0xfee08348         // and    rax, -2
  2890  	WORD $0xf748; BYTE $0xd8 // neg    rax
  2891  	WORD $0xff31             // xor    edi, edi
  2892  
  2893  LBB0_947:
  2894  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  2895  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  2896  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  2897  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  2898  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  2899  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  2900  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  2901  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  2902  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  2903  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  2904  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  2905  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  2906  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  2907  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  2908  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  2909  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  2910  	LONG $0x10c78348                           // add    rdi, 16
  2911  	LONG $0x02c08348                           // add    rax, 2
  2912  	JNE  LBB0_947
  2913  	JMP  LBB0_948
  2914  
  2915  LBB0_107:
  2916  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  2917  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  2918  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2919  	LONG $0xd1970f41         // seta    r9b
  2920  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  2921  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  2922  	LONG $0xd3970f41         // seta    r11b
  2923  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2924  	WORD $0x970f; BYTE $0xd0 // seta    al
  2925  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  2926  	LONG $0xd7970f40         // seta    dil
  2927  	WORD $0xf631             // xor    esi, esi
  2928  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  2929  	JNE  LBB0_116
  2930  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  2931  	JNE  LBB0_116
  2932  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  2933  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  2934  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  2935  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  2936  	LONG $0x03e9c149         // shr    r9, 3
  2937  	LONG $0x01c18349         // add    r9, 1
  2938  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  2939  	JE   LBB0_110
  2940  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  2941  	LONG $0xfee08348         // and    rax, -2
  2942  	WORD $0xf748; BYTE $0xd8 // neg    rax
  2943  	WORD $0xff31             // xor    edi, edi
  2944  
  2945  LBB0_112:
  2946  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  2947  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  2948  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  2949  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  2950  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  2951  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  2952  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  2953  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  2954  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  2955  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  2956  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  2957  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  2958  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  2959  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  2960  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  2961  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  2962  	LONG $0x10c78348                           // add    rdi, 16
  2963  	LONG $0x02c08348                           // add    rax, 2
  2964  	JNE  LBB0_112
  2965  	JMP  LBB0_113
  2966  
  2967  LBB0_453:
  2968  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  2969  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  2970  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2971  	LONG $0xd1970f41         // seta    r9b
  2972  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  2973  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  2974  	LONG $0xd3970f41         // seta    r11b
  2975  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  2976  	WORD $0x970f; BYTE $0xd0 // seta    al
  2977  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  2978  	LONG $0xd7970f40         // seta    dil
  2979  	WORD $0xf631             // xor    esi, esi
  2980  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  2981  	JNE  LBB0_462
  2982  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  2983  	JNE  LBB0_462
  2984  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  2985  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  2986  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  2987  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  2988  	LONG $0x03e9c149         // shr    r9, 3
  2989  	LONG $0x01c18349         // add    r9, 1
  2990  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  2991  	JE   LBB0_456
  2992  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  2993  	LONG $0xfee08348         // and    rax, -2
  2994  	WORD $0xf748; BYTE $0xd8 // neg    rax
  2995  	WORD $0xff31             // xor    edi, edi
  2996  
  2997  LBB0_458:
  2998  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  2999  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  3000  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  3001  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  3002  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  3003  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  3004  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  3005  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  3006  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  3007  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  3008  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  3009  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  3010  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
  3011  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  3012  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm0
  3013  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
  3014  	LONG $0x10c78348                           // add    rdi, 16
  3015  	LONG $0x02c08348                           // add    rax, 2
  3016  	JNE  LBB0_458
  3017  	JMP  LBB0_459
  3018  
  3019  LBB0_280:
  3020  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  3021  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  3022  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3023  	LONG $0xd1970f41         // seta    r9b
  3024  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  3025  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3026  	LONG $0xd3970f41         // seta    r11b
  3027  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3028  	WORD $0x970f; BYTE $0xd0 // seta    al
  3029  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3030  	LONG $0xd7970f40         // seta    dil
  3031  	WORD $0xf631             // xor    esi, esi
  3032  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3033  	JNE  LBB0_289
  3034  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3035  	JNE  LBB0_289
  3036  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3037  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  3038  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  3039  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3040  	LONG $0x03e9c149         // shr    r9, 3
  3041  	LONG $0x01c18349         // add    r9, 1
  3042  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3043  	JE   LBB0_283
  3044  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3045  	LONG $0xfee08348         // and    rax, -2
  3046  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3047  	WORD $0xff31             // xor    edi, edi
  3048  
  3049  LBB0_285:
  3050  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  3051  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  3052  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  3053  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  3054  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  3055  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  3056  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  3057  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  3058  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  3059  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  3060  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  3061  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  3062  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  3063  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  3064  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  3065  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  3066  	LONG $0x10c78348                           // add    rdi, 16
  3067  	LONG $0x02c08348                           // add    rax, 2
  3068  	JNE  LBB0_285
  3069  	JMP  LBB0_286
  3070  
  3071  LBB0_626:
  3072  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  3073  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  3074  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3075  	LONG $0xd1970f41         // seta    r9b
  3076  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  3077  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3078  	LONG $0xd3970f41         // seta    r11b
  3079  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3080  	WORD $0x970f; BYTE $0xd0 // seta    al
  3081  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3082  	LONG $0xd7970f40         // seta    dil
  3083  	WORD $0xf631             // xor    esi, esi
  3084  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3085  	JNE  LBB0_635
  3086  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3087  	JNE  LBB0_635
  3088  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3089  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  3090  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  3091  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3092  	LONG $0x03e9c149         // shr    r9, 3
  3093  	LONG $0x01c18349         // add    r9, 1
  3094  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3095  	JE   LBB0_629
  3096  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3097  	LONG $0xfee08348         // and    rax, -2
  3098  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3099  	WORD $0xff31             // xor    edi, edi
  3100  
  3101  LBB0_631:
  3102  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  3103  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  3104  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  3105  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  3106  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  3107  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  3108  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  3109  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  3110  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  3111  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  3112  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  3113  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  3114  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
  3115  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  3116  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm0
  3117  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
  3118  	LONG $0x10c78348                           // add    rdi, 16
  3119  	LONG $0x02c08348                           // add    rax, 2
  3120  	JNE  LBB0_631
  3121  	JMP  LBB0_632
  3122  
  3123  LBB0_850:
  3124  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3125  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3126  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3127  	LONG $0xd1970f41         // seta    r9b
  3128  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3129  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3130  	LONG $0xd3970f41         // seta    r11b
  3131  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3132  	WORD $0x970f; BYTE $0xd0 // seta    al
  3133  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3134  	LONG $0xd7970f40         // seta    dil
  3135  	WORD $0xf631             // xor    esi, esi
  3136  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3137  	JNE  LBB0_859
  3138  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3139  	JNE  LBB0_859
  3140  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3141  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3142  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3143  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3144  	LONG $0x02e9c149         // shr    r9, 2
  3145  	LONG $0x01c18349         // add    r9, 1
  3146  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3147  	JE   LBB0_853
  3148  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3149  	LONG $0xfee08348         // and    rax, -2
  3150  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3151  	WORD $0xff31             // xor    edi, edi
  3152  
  3153  LBB0_855:
  3154  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3155  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3156  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3157  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  3158  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  3159  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  3160  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  3161  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  3162  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3163  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3164  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3165  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  3166  	LONG $0x44100f66; WORD $0x30f9             // movupd    xmm0, oword [rcx + 8*rdi + 48]
  3167  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  3168  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
  3169  	LONG $0x110f4166; WORD $0xf844; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm0
  3170  	LONG $0x08c78348                           // add    rdi, 8
  3171  	LONG $0x02c08348                           // add    rax, 2
  3172  	JNE  LBB0_855
  3173  	JMP  LBB0_856
  3174  
  3175  LBB0_1000:
  3176  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3177  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3178  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3179  	LONG $0xd1970f41         // seta    r9b
  3180  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3181  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3182  	LONG $0xd3970f41         // seta    r11b
  3183  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3184  	WORD $0x970f; BYTE $0xd0 // seta    al
  3185  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3186  	LONG $0xd7970f40         // seta    dil
  3187  	WORD $0xf631             // xor    esi, esi
  3188  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3189  	JNE  LBB0_1009
  3190  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3191  	JNE  LBB0_1009
  3192  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3193  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3194  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3195  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3196  	LONG $0x02e9c149         // shr    r9, 2
  3197  	LONG $0x01c18349         // add    r9, 1
  3198  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3199  	JE   LBB0_1003
  3200  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3201  	LONG $0xfee08348         // and    rax, -2
  3202  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3203  	WORD $0xff31             // xor    edi, edi
  3204  
  3205  LBB0_1005:
  3206  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3207  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3208  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3209  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  3210  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  3211  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  3212  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  3213  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  3214  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3215  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3216  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3217  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  3218  	LONG $0x44100f66; WORD $0x30f9             // movupd    xmm0, oword [rcx + 8*rdi + 48]
  3219  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  3220  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
  3221  	LONG $0x110f4166; WORD $0xf844; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm0
  3222  	LONG $0x08c78348                           // add    rdi, 8
  3223  	LONG $0x02c08348                           // add    rax, 2
  3224  	JNE  LBB0_1005
  3225  	JMP  LBB0_1006
  3226  
  3227  LBB0_181:
  3228  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3229  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3230  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3231  	LONG $0xd1970f41         // seta    r9b
  3232  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3233  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3234  	LONG $0xd3970f41         // seta    r11b
  3235  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3236  	WORD $0x970f; BYTE $0xd0 // seta    al
  3237  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3238  	LONG $0xd7970f40         // seta    dil
  3239  	WORD $0xf631             // xor    esi, esi
  3240  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3241  	JNE  LBB0_190
  3242  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3243  	JNE  LBB0_190
  3244  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3245  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3246  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3247  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3248  	LONG $0x02e9c149         // shr    r9, 2
  3249  	LONG $0x01c18349         // add    r9, 1
  3250  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3251  	JE   LBB0_184
  3252  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3253  	LONG $0xfee08348         // and    rax, -2
  3254  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3255  	WORD $0xff31             // xor    edi, edi
  3256  
  3257  LBB0_186:
  3258  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3259  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3260  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3261  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  3262  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  3263  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  3264  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  3265  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  3266  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3267  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3268  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3269  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  3270  	LONG $0x44100f66; WORD $0x30f9             // movupd    xmm0, oword [rcx + 8*rdi + 48]
  3271  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  3272  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
  3273  	LONG $0x110f4166; WORD $0xf844; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm0
  3274  	LONG $0x08c78348                           // add    rdi, 8
  3275  	LONG $0x02c08348                           // add    rax, 2
  3276  	JNE  LBB0_186
  3277  	JMP  LBB0_187
  3278  
  3279  LBB0_527:
  3280  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3281  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3282  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3283  	LONG $0xd1970f41         // seta    r9b
  3284  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3285  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3286  	LONG $0xd3970f41         // seta    r11b
  3287  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3288  	WORD $0x970f; BYTE $0xd0 // seta    al
  3289  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3290  	LONG $0xd7970f40         // seta    dil
  3291  	WORD $0xf631             // xor    esi, esi
  3292  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3293  	JNE  LBB0_536
  3294  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3295  	JNE  LBB0_536
  3296  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3297  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3298  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3299  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3300  	LONG $0x02e9c149         // shr    r9, 2
  3301  	LONG $0x01c18349         // add    r9, 1
  3302  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3303  	JE   LBB0_530
  3304  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3305  	LONG $0xfee08348         // and    rax, -2
  3306  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3307  	WORD $0xff31             // xor    edi, edi
  3308  
  3309  LBB0_532:
  3310  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3311  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3312  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3313  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  3314  	LONG $0x54100f66; WORD $0x10f9             // movupd    xmm2, oword [rcx + 8*rdi + 16]
  3315  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  3316  	LONG $0x110f4166; WORD $0xf804             // movupd    oword [r8 + 8*rdi], xmm0
  3317  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
  3318  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3319  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3320  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3321  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  3322  	LONG $0x54100f66; WORD $0x30f9             // movupd    xmm2, oword [rcx + 8*rdi + 48]
  3323  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  3324  	LONG $0x110f4166; WORD $0xf844; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm0
  3325  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm1
  3326  	LONG $0x08c78348                           // add    rdi, 8
  3327  	LONG $0x02c08348                           // add    rax, 2
  3328  	JNE  LBB0_532
  3329  	JMP  LBB0_533
  3330  
  3331  LBB0_354:
  3332  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3333  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3334  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3335  	LONG $0xd1970f41         // seta    r9b
  3336  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3337  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3338  	LONG $0xd3970f41         // seta    r11b
  3339  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3340  	WORD $0x970f; BYTE $0xd0 // seta    al
  3341  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3342  	LONG $0xd7970f40         // seta    dil
  3343  	WORD $0xf631             // xor    esi, esi
  3344  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3345  	JNE  LBB0_363
  3346  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3347  	JNE  LBB0_363
  3348  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3349  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3350  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3351  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3352  	LONG $0x02e9c149         // shr    r9, 2
  3353  	LONG $0x01c18349         // add    r9, 1
  3354  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3355  	JE   LBB0_357
  3356  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3357  	LONG $0xfee08348         // and    rax, -2
  3358  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3359  	WORD $0xff31             // xor    edi, edi
  3360  
  3361  LBB0_359:
  3362  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3363  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3364  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3365  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  3366  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  3367  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  3368  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  3369  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  3370  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3371  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3372  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3373  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  3374  	LONG $0x44100f66; WORD $0x30f9             // movupd    xmm0, oword [rcx + 8*rdi + 48]
  3375  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  3376  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
  3377  	LONG $0x110f4166; WORD $0xf844; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm0
  3378  	LONG $0x08c78348                           // add    rdi, 8
  3379  	LONG $0x02c08348                           // add    rax, 2
  3380  	JNE  LBB0_359
  3381  	JMP  LBB0_360
  3382  
  3383  LBB0_700:
  3384  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3385  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3386  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3387  	LONG $0xd1970f41         // seta    r9b
  3388  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3389  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3390  	LONG $0xd3970f41         // seta    r11b
  3391  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3392  	WORD $0x970f; BYTE $0xd0 // seta    al
  3393  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3394  	LONG $0xd7970f40         // seta    dil
  3395  	WORD $0xf631             // xor    esi, esi
  3396  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3397  	JNE  LBB0_709
  3398  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3399  	JNE  LBB0_709
  3400  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3401  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3402  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3403  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3404  	LONG $0x02e9c149         // shr    r9, 2
  3405  	LONG $0x01c18349         // add    r9, 1
  3406  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3407  	JE   LBB0_703
  3408  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3409  	LONG $0xfee08348         // and    rax, -2
  3410  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3411  	WORD $0xff31             // xor    edi, edi
  3412  
  3413  LBB0_705:
  3414  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  3415  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  3416  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  3417  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  3418  	LONG $0x54100f66; WORD $0x10f9             // movupd    xmm2, oword [rcx + 8*rdi + 16]
  3419  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  3420  	LONG $0x110f4166; WORD $0xf804             // movupd    oword [r8 + 8*rdi], xmm0
  3421  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
  3422  	LONG $0x44100f66; WORD $0x20fa             // movupd    xmm0, oword [rdx + 8*rdi + 32]
  3423  	LONG $0x4c100f66; WORD $0x30fa             // movupd    xmm1, oword [rdx + 8*rdi + 48]
  3424  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
  3425  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  3426  	LONG $0x54100f66; WORD $0x30f9             // movupd    xmm2, oword [rcx + 8*rdi + 48]
  3427  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  3428  	LONG $0x110f4166; WORD $0xf844; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm0
  3429  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm1
  3430  	LONG $0x08c78348                           // add    rdi, 8
  3431  	LONG $0x02c08348                           // add    rax, 2
  3432  	JNE  LBB0_705
  3433  	JMP  LBB0_706
  3434  
  3435  LBB0_747:
  3436  	LONG $0x10348d4b             // lea    rsi, [r8 + r10]
  3437  	LONG $0x12048d4a             // lea    rax, [rdx + r10]
  3438  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  3439  	LONG $0xd1970f41             // seta    r9b
  3440  	LONG $0x11048d4a             // lea    rax, [rcx + r10]
  3441  	WORD $0x3948; BYTE $0xd6     // cmp    rsi, rdx
  3442  	LONG $0xd3970f41             // seta    r11b
  3443  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  3444  	WORD $0x970f; BYTE $0xd0     // seta    al
  3445  	WORD $0x3948; BYTE $0xce     // cmp    rsi, rcx
  3446  	LONG $0xd6970f40             // seta    sil
  3447  	WORD $0xff31                 // xor    edi, edi
  3448  	WORD $0x8445; BYTE $0xd9     // test    r9b, r11b
  3449  	JNE  LBB0_756
  3450  	WORD $0x2040; BYTE $0xf0     // and    al, sil
  3451  	JNE  LBB0_756
  3452  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
  3453  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
  3454  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
  3455  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
  3456  	LONG $0x05e9c149             // shr    r9, 5
  3457  	LONG $0x01c18349             // add    r9, 1
  3458  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  3459  	JE   LBB0_750
  3460  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
  3461  	LONG $0xfee68348             // and    rsi, -2
  3462  	WORD $0xf748; BYTE $0xde     // neg    rsi
  3463  	WORD $0xc031                 // xor    eax, eax
  3464  	LONG $0x456f0f66; BYTE $0x00 // movdqa    xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
  3465  
  3466  LBB0_752:
  3467  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  3468  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  3469  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  3470  	LONG $0x646f0ff3; WORD $0x1001             // movdqu    xmm4, oword [rcx + rax + 16]
  3471  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  3472  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  3473  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  3474  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  3475  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3476  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3477  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  3478  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  3479  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  3480  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  3481  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  3482  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  3483  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  3484  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  3485  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  3486  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3487  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3488  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  3489  	LONG $0x7f0f41f3; WORD $0x0034             // movdqu    oword [r8 + rax], xmm6
  3490  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm3
  3491  	LONG $0x4c6f0ff3; WORD $0x2002             // movdqu    xmm1, oword [rdx + rax + 32]
  3492  	LONG $0x546f0ff3; WORD $0x3002             // movdqu    xmm2, oword [rdx + rax + 48]
  3493  	LONG $0x5c6f0ff3; WORD $0x2001             // movdqu    xmm3, oword [rcx + rax + 32]
  3494  	LONG $0x646f0ff3; WORD $0x3001             // movdqu    xmm4, oword [rcx + rax + 48]
  3495  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  3496  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  3497  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  3498  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  3499  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3500  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3501  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  3502  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  3503  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  3504  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  3505  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  3506  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  3507  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  3508  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  3509  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  3510  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3511  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3512  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  3513  	LONG $0x7f0f41f3; WORD $0x0074; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm6
  3514  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm3
  3515  	LONG $0x40c08348                           // add    rax, 64
  3516  	LONG $0x02c68348                           // add    rsi, 2
  3517  	JNE  LBB0_752
  3518  	JMP  LBB0_753
  3519  
  3520  LBB0_897:
  3521  	LONG $0x10348d4b             // lea    rsi, [r8 + r10]
  3522  	LONG $0x12048d4a             // lea    rax, [rdx + r10]
  3523  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  3524  	LONG $0xd1970f41             // seta    r9b
  3525  	LONG $0x11048d4a             // lea    rax, [rcx + r10]
  3526  	WORD $0x3948; BYTE $0xd6     // cmp    rsi, rdx
  3527  	LONG $0xd3970f41             // seta    r11b
  3528  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  3529  	WORD $0x970f; BYTE $0xd0     // seta    al
  3530  	WORD $0x3948; BYTE $0xce     // cmp    rsi, rcx
  3531  	LONG $0xd6970f40             // seta    sil
  3532  	WORD $0xff31                 // xor    edi, edi
  3533  	WORD $0x8445; BYTE $0xd9     // test    r9b, r11b
  3534  	JNE  LBB0_906
  3535  	WORD $0x2040; BYTE $0xf0     // and    al, sil
  3536  	JNE  LBB0_906
  3537  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
  3538  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
  3539  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
  3540  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
  3541  	LONG $0x05e9c149             // shr    r9, 5
  3542  	LONG $0x01c18349             // add    r9, 1
  3543  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  3544  	JE   LBB0_900
  3545  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
  3546  	LONG $0xfee68348             // and    rsi, -2
  3547  	WORD $0xf748; BYTE $0xde     // neg    rsi
  3548  	WORD $0xc031                 // xor    eax, eax
  3549  	LONG $0x456f0f66; BYTE $0x00 // movdqa    xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
  3550  
  3551  LBB0_902:
  3552  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  3553  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  3554  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  3555  	LONG $0x646f0ff3; WORD $0x1001             // movdqu    xmm4, oword [rcx + rax + 16]
  3556  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  3557  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  3558  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  3559  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  3560  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3561  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3562  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  3563  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  3564  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  3565  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  3566  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  3567  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  3568  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  3569  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  3570  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  3571  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3572  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3573  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  3574  	LONG $0x7f0f41f3; WORD $0x0034             // movdqu    oword [r8 + rax], xmm6
  3575  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm3
  3576  	LONG $0x4c6f0ff3; WORD $0x2002             // movdqu    xmm1, oword [rdx + rax + 32]
  3577  	LONG $0x546f0ff3; WORD $0x3002             // movdqu    xmm2, oword [rdx + rax + 48]
  3578  	LONG $0x5c6f0ff3; WORD $0x2001             // movdqu    xmm3, oword [rcx + rax + 32]
  3579  	LONG $0x646f0ff3; WORD $0x3001             // movdqu    xmm4, oword [rcx + rax + 48]
  3580  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  3581  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  3582  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  3583  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  3584  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3585  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3586  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  3587  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  3588  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  3589  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  3590  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  3591  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  3592  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  3593  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  3594  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  3595  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  3596  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  3597  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  3598  	LONG $0x7f0f41f3; WORD $0x0074; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm6
  3599  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm3
  3600  	LONG $0x40c08348                           // add    rax, 64
  3601  	LONG $0x02c68348                           // add    rsi, 2
  3602  	JNE  LBB0_902
  3603  	JMP  LBB0_903
  3604  
  3605  LBB0_62:
  3606  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  3607  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  3608  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3609  	LONG $0xd1970f41         // seta    r9b
  3610  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  3611  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3612  	LONG $0xd3970f41         // seta    r11b
  3613  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3614  	WORD $0x970f; BYTE $0xd0 // seta    al
  3615  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3616  	LONG $0xd7970f40         // seta    dil
  3617  	WORD $0xf631             // xor    esi, esi
  3618  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3619  	JNE  LBB0_71
  3620  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3621  	JNE  LBB0_71
  3622  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3623  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3624  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3625  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3626  	LONG $0x05e9c149         // shr    r9, 5
  3627  	LONG $0x01c18349         // add    r9, 1
  3628  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3629  	JE   LBB0_65
  3630  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3631  	LONG $0xfee08348         // and    rax, -2
  3632  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3633  	WORD $0xff31             // xor    edi, edi
  3634  
  3635  LBB0_67:
  3636  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  3637  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  3638  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  3639  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  3640  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  3641  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  3642  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  3643  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  3644  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  3645  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  3646  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  3647  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  3648  	LONG $0x446f0ff3; WORD $0x3039             // movdqu    xmm0, oword [rcx + rdi + 48]
  3649  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  3650  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm2
  3651  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm0
  3652  	LONG $0x40c78348                           // add    rdi, 64
  3653  	LONG $0x02c08348                           // add    rax, 2
  3654  	JNE  LBB0_67
  3655  	JMP  LBB0_68
  3656  
  3657  LBB0_408:
  3658  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  3659  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  3660  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3661  	LONG $0xd1970f41         // seta    r9b
  3662  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  3663  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3664  	LONG $0xd3970f41         // seta    r11b
  3665  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3666  	WORD $0x970f; BYTE $0xd0 // seta    al
  3667  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3668  	LONG $0xd7970f40         // seta    dil
  3669  	WORD $0xf631             // xor    esi, esi
  3670  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3671  	JNE  LBB0_417
  3672  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3673  	JNE  LBB0_417
  3674  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3675  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3676  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3677  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3678  	LONG $0x05e9c149         // shr    r9, 5
  3679  	LONG $0x01c18349         // add    r9, 1
  3680  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3681  	JE   LBB0_411
  3682  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3683  	LONG $0xfee08348         // and    rax, -2
  3684  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3685  	WORD $0xff31             // xor    edi, edi
  3686  
  3687  LBB0_413:
  3688  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  3689  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  3690  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  3691  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  3692  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  3693  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  3694  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  3695  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  3696  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  3697  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  3698  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  3699  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  3700  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
  3701  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  3702  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm0
  3703  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
  3704  	LONG $0x40c78348                           // add    rdi, 64
  3705  	LONG $0x02c08348                           // add    rax, 2
  3706  	JNE  LBB0_413
  3707  	JMP  LBB0_414
  3708  
  3709  LBB0_235:
  3710  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  3711  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  3712  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3713  	LONG $0xd1970f41         // seta    r9b
  3714  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  3715  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3716  	LONG $0xd3970f41         // seta    r11b
  3717  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3718  	WORD $0x970f; BYTE $0xd0 // seta    al
  3719  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3720  	LONG $0xd7970f40         // seta    dil
  3721  	WORD $0xf631             // xor    esi, esi
  3722  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3723  	JNE  LBB0_244
  3724  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3725  	JNE  LBB0_244
  3726  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3727  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3728  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3729  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3730  	LONG $0x05e9c149         // shr    r9, 5
  3731  	LONG $0x01c18349         // add    r9, 1
  3732  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3733  	JE   LBB0_238
  3734  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3735  	LONG $0xfee08348         // and    rax, -2
  3736  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3737  	WORD $0xff31             // xor    edi, edi
  3738  
  3739  LBB0_240:
  3740  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  3741  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  3742  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  3743  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  3744  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  3745  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  3746  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  3747  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  3748  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  3749  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  3750  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  3751  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  3752  	LONG $0x446f0ff3; WORD $0x3039             // movdqu    xmm0, oword [rcx + rdi + 48]
  3753  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  3754  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm2
  3755  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm0
  3756  	LONG $0x40c78348                           // add    rdi, 64
  3757  	LONG $0x02c08348                           // add    rax, 2
  3758  	JNE  LBB0_240
  3759  	JMP  LBB0_241
  3760  
  3761  LBB0_581:
  3762  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  3763  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  3764  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3765  	LONG $0xd1970f41         // seta    r9b
  3766  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  3767  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3768  	LONG $0xd3970f41         // seta    r11b
  3769  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3770  	WORD $0x970f; BYTE $0xd0 // seta    al
  3771  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3772  	LONG $0xd7970f40         // seta    dil
  3773  	WORD $0xf631             // xor    esi, esi
  3774  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3775  	JNE  LBB0_590
  3776  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3777  	JNE  LBB0_590
  3778  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3779  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  3780  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  3781  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3782  	LONG $0x05e9c149         // shr    r9, 5
  3783  	LONG $0x01c18349         // add    r9, 1
  3784  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3785  	JE   LBB0_584
  3786  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3787  	LONG $0xfee08348         // and    rax, -2
  3788  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3789  	WORD $0xff31             // xor    edi, edi
  3790  
  3791  LBB0_586:
  3792  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  3793  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  3794  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  3795  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  3796  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  3797  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  3798  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  3799  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  3800  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  3801  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  3802  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  3803  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  3804  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
  3805  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  3806  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm0
  3807  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
  3808  	LONG $0x40c78348                           // add    rdi, 64
  3809  	LONG $0x02c08348                           // add    rax, 2
  3810  	JNE  LBB0_586
  3811  	JMP  LBB0_587
  3812  
  3813  LBB0_821:
  3814  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3815  	WORD $0xff31             // xor    edi, edi
  3816  
  3817  LBB0_822:
  3818  	LONG $0xf9048b48               // mov    rax, qword [rcx + 8*rdi]
  3819  	LONG $0x04af0f48; BYTE $0xfa   // imul    rax, qword [rdx + 8*rdi]
  3820  	LONG $0xf8048949               // mov    qword [r8 + 8*rdi], rax
  3821  	LONG $0xf9448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rdi + 8]
  3822  	LONG $0x44af0f48; WORD $0x08fa // imul    rax, qword [rdx + 8*rdi + 8]
  3823  	LONG $0xf8448949; BYTE $0x08   // mov    qword [r8 + 8*rdi + 8], rax
  3824  	LONG $0xf9448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rdi + 16]
  3825  	LONG $0x44af0f48; WORD $0x10fa // imul    rax, qword [rdx + 8*rdi + 16]
  3826  	LONG $0xf8448949; BYTE $0x10   // mov    qword [r8 + 8*rdi + 16], rax
  3827  	LONG $0xf9448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rdi + 24]
  3828  	LONG $0x44af0f48; WORD $0x18fa // imul    rax, qword [rdx + 8*rdi + 24]
  3829  	LONG $0xf8448949; BYTE $0x18   // mov    qword [r8 + 8*rdi + 24], rax
  3830  	LONG $0x04c78348               // add    rdi, 4
  3831  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3832  	JNE  LBB0_822
  3833  
  3834  LBB0_823:
  3835  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
  3836  	JE   LBB0_1013
  3837  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
  3838  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3839  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  3840  	WORD $0xff31             // xor    edi, edi
  3841  
  3842  LBB0_825:
  3843  	LONG $0xf9048b48             // mov    rax, qword [rcx + 8*rdi]
  3844  	LONG $0x04af0f48; BYTE $0xfa // imul    rax, qword [rdx + 8*rdi]
  3845  	LONG $0xfe048948             // mov    qword [rsi + 8*rdi], rax
  3846  	LONG $0x01c78348             // add    rdi, 1
  3847  	WORD $0x3949; BYTE $0xf9     // cmp    r9, rdi
  3848  	JNE  LBB0_825
  3849  	JMP  LBB0_1013
  3850  
  3851  LBB0_971:
  3852  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3853  	WORD $0xff31             // xor    edi, edi
  3854  
  3855  LBB0_972:
  3856  	LONG $0xf9048b48               // mov    rax, qword [rcx + 8*rdi]
  3857  	LONG $0x04af0f48; BYTE $0xfa   // imul    rax, qword [rdx + 8*rdi]
  3858  	LONG $0xf8048949               // mov    qword [r8 + 8*rdi], rax
  3859  	LONG $0xf9448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rdi + 8]
  3860  	LONG $0x44af0f48; WORD $0x08fa // imul    rax, qword [rdx + 8*rdi + 8]
  3861  	LONG $0xf8448949; BYTE $0x08   // mov    qword [r8 + 8*rdi + 8], rax
  3862  	LONG $0xf9448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rdi + 16]
  3863  	LONG $0x44af0f48; WORD $0x10fa // imul    rax, qword [rdx + 8*rdi + 16]
  3864  	LONG $0xf8448949; BYTE $0x10   // mov    qword [r8 + 8*rdi + 16], rax
  3865  	LONG $0xf9448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rdi + 24]
  3866  	LONG $0x44af0f48; WORD $0x18fa // imul    rax, qword [rdx + 8*rdi + 24]
  3867  	LONG $0xf8448949; BYTE $0x18   // mov    qword [r8 + 8*rdi + 24], rax
  3868  	LONG $0x04c78348               // add    rdi, 4
  3869  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  3870  	JNE  LBB0_972
  3871  
  3872  LBB0_973:
  3873  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
  3874  	JE   LBB0_1013
  3875  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
  3876  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  3877  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  3878  	WORD $0xff31             // xor    edi, edi
  3879  
  3880  LBB0_975:
  3881  	LONG $0xf9048b48             // mov    rax, qword [rcx + 8*rdi]
  3882  	LONG $0x04af0f48; BYTE $0xfa // imul    rax, qword [rdx + 8*rdi]
  3883  	LONG $0xfe048948             // mov    qword [rsi + 8*rdi], rax
  3884  	LONG $0x01c78348             // add    rdi, 1
  3885  	WORD $0x3949; BYTE $0xf9     // cmp    r9, rdi
  3886  	JNE  LBB0_975
  3887  	JMP  LBB0_1013
  3888  
  3889  LBB0_136:
  3890  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3891  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3892  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3893  	LONG $0xd1970f41         // seta    r9b
  3894  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3895  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3896  	LONG $0xd3970f41         // seta    r11b
  3897  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3898  	WORD $0x970f; BYTE $0xd0 // seta    al
  3899  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3900  	LONG $0xd7970f40         // seta    dil
  3901  	WORD $0xf631             // xor    esi, esi
  3902  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3903  	JNE  LBB0_145
  3904  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3905  	JNE  LBB0_145
  3906  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3907  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3908  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3909  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3910  	LONG $0x02e9c149         // shr    r9, 2
  3911  	LONG $0x01c18349         // add    r9, 1
  3912  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3913  	JE   LBB0_139
  3914  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3915  	LONG $0xfee08348         // and    rax, -2
  3916  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3917  	WORD $0xff31             // xor    edi, edi
  3918  
  3919  LBB0_141:
  3920  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  3921  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  3922  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  3923  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  3924  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  3925  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  3926  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  3927  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  3928  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  3929  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  3930  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  3931  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  3932  	LONG $0x446f0ff3; WORD $0x30f9             // movdqu    xmm0, oword [rcx + 8*rdi + 48]
  3933  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  3934  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm2
  3935  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm0
  3936  	LONG $0x08c78348                           // add    rdi, 8
  3937  	LONG $0x02c08348                           // add    rax, 2
  3938  	JNE  LBB0_141
  3939  	JMP  LBB0_142
  3940  
  3941  LBB0_482:
  3942  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3943  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3944  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3945  	LONG $0xd1970f41         // seta    r9b
  3946  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3947  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  3948  	LONG $0xd3970f41         // seta    r11b
  3949  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3950  	WORD $0x970f; BYTE $0xd0 // seta    al
  3951  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  3952  	LONG $0xd7970f40         // seta    dil
  3953  	WORD $0xf631             // xor    esi, esi
  3954  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  3955  	JNE  LBB0_491
  3956  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  3957  	JNE  LBB0_491
  3958  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  3959  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  3960  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  3961  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  3962  	LONG $0x02e9c149         // shr    r9, 2
  3963  	LONG $0x01c18349         // add    r9, 1
  3964  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  3965  	JE   LBB0_485
  3966  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  3967  	LONG $0xfee08348         // and    rax, -2
  3968  	WORD $0xf748; BYTE $0xd8 // neg    rax
  3969  	WORD $0xff31             // xor    edi, edi
  3970  
  3971  LBB0_487:
  3972  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  3973  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  3974  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  3975  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  3976  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  3977  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  3978  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  3979  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  3980  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  3981  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  3982  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  3983  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  3984  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
  3985  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  3986  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm0
  3987  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
  3988  	LONG $0x08c78348                           // add    rdi, 8
  3989  	LONG $0x02c08348                           // add    rax, 2
  3990  	JNE  LBB0_487
  3991  	JMP  LBB0_488
  3992  
  3993  LBB0_309:
  3994  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  3995  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  3996  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  3997  	LONG $0xd1970f41         // seta    r9b
  3998  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  3999  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4000  	LONG $0xd3970f41         // seta    r11b
  4001  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4002  	WORD $0x970f; BYTE $0xd0 // seta    al
  4003  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4004  	LONG $0xd7970f40         // seta    dil
  4005  	WORD $0xf631             // xor    esi, esi
  4006  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4007  	JNE  LBB0_318
  4008  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4009  	JNE  LBB0_318
  4010  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4011  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4012  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4013  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4014  	LONG $0x02e9c149         // shr    r9, 2
  4015  	LONG $0x01c18349         // add    r9, 1
  4016  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4017  	JE   LBB0_312
  4018  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4019  	LONG $0xfee08348         // and    rax, -2
  4020  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4021  	WORD $0xff31             // xor    edi, edi
  4022  
  4023  LBB0_314:
  4024  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  4025  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4026  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  4027  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  4028  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  4029  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  4030  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  4031  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  4032  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4033  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4034  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  4035  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  4036  	LONG $0x446f0ff3; WORD $0x30f9             // movdqu    xmm0, oword [rcx + 8*rdi + 48]
  4037  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  4038  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm2
  4039  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm0
  4040  	LONG $0x08c78348                           // add    rdi, 8
  4041  	LONG $0x02c08348                           // add    rax, 2
  4042  	JNE  LBB0_314
  4043  	JMP  LBB0_315
  4044  
  4045  LBB0_655:
  4046  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  4047  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  4048  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4049  	LONG $0xd1970f41         // seta    r9b
  4050  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  4051  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4052  	LONG $0xd3970f41         // seta    r11b
  4053  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4054  	WORD $0x970f; BYTE $0xd0 // seta    al
  4055  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4056  	LONG $0xd7970f40         // seta    dil
  4057  	WORD $0xf631             // xor    esi, esi
  4058  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4059  	JNE  LBB0_664
  4060  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4061  	JNE  LBB0_664
  4062  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4063  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4064  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4065  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4066  	LONG $0x02e9c149         // shr    r9, 2
  4067  	LONG $0x01c18349         // add    r9, 1
  4068  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4069  	JE   LBB0_658
  4070  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4071  	LONG $0xfee08348         // and    rax, -2
  4072  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4073  	WORD $0xff31             // xor    edi, edi
  4074  
  4075  LBB0_660:
  4076  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  4077  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4078  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  4079  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  4080  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  4081  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  4082  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  4083  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  4084  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4085  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4086  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  4087  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  4088  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
  4089  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  4090  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm0
  4091  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
  4092  	LONG $0x08c78348                           // add    rdi, 8
  4093  	LONG $0x02c08348                           // add    rax, 2
  4094  	JNE  LBB0_660
  4095  	JMP  LBB0_661
  4096  
  4097  LBB0_763:
  4098  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4099  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4100  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4101  	LONG $0xd1970f41         // seta    r9b
  4102  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4103  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4104  	LONG $0xd3970f41         // seta    r11b
  4105  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4106  	WORD $0x970f; BYTE $0xd0 // seta    al
  4107  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4108  	LONG $0xd7970f40         // seta    dil
  4109  	WORD $0xf631             // xor    esi, esi
  4110  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4111  	JNE  LBB0_772
  4112  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4113  	JNE  LBB0_772
  4114  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4115  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4116  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4117  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4118  	LONG $0x04e9c149         // shr    r9, 4
  4119  	LONG $0x01c18349         // add    r9, 1
  4120  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4121  	JE   LBB0_766
  4122  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4123  	LONG $0xfee08348         // and    rax, -2
  4124  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4125  	WORD $0xff31             // xor    edi, edi
  4126  
  4127  LBB0_768:
  4128  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4129  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4130  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4131  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4132  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4133  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4134  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4135  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4136  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4137  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4138  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4139  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4140  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4141  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4142  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4143  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4144  	LONG $0x20c78348                           // add    rdi, 32
  4145  	LONG $0x02c08348                           // add    rax, 2
  4146  	JNE  LBB0_768
  4147  	JMP  LBB0_769
  4148  
  4149  LBB0_779:
  4150  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4151  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4152  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4153  	LONG $0xd1970f41         // seta    r9b
  4154  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4155  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4156  	LONG $0xd3970f41         // seta    r11b
  4157  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4158  	WORD $0x970f; BYTE $0xd0 // seta    al
  4159  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4160  	LONG $0xd7970f40         // seta    dil
  4161  	WORD $0xf631             // xor    esi, esi
  4162  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4163  	JNE  LBB0_788
  4164  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4165  	JNE  LBB0_788
  4166  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4167  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4168  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4169  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4170  	LONG $0x04e9c149         // shr    r9, 4
  4171  	LONG $0x01c18349         // add    r9, 1
  4172  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4173  	JE   LBB0_782
  4174  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4175  	LONG $0xfee08348         // and    rax, -2
  4176  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4177  	WORD $0xff31             // xor    edi, edi
  4178  
  4179  LBB0_784:
  4180  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4181  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4182  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4183  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4184  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4185  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4186  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4187  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4188  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4189  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4190  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4191  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4192  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4193  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4194  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4195  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4196  	LONG $0x20c78348                           // add    rdi, 32
  4197  	LONG $0x02c08348                           // add    rax, 2
  4198  	JNE  LBB0_784
  4199  	JMP  LBB0_785
  4200  
  4201  LBB0_913:
  4202  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4203  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4204  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4205  	LONG $0xd1970f41         // seta    r9b
  4206  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4207  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4208  	LONG $0xd3970f41         // seta    r11b
  4209  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4210  	WORD $0x970f; BYTE $0xd0 // seta    al
  4211  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4212  	LONG $0xd7970f40         // seta    dil
  4213  	WORD $0xf631             // xor    esi, esi
  4214  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4215  	JNE  LBB0_922
  4216  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4217  	JNE  LBB0_922
  4218  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4219  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4220  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4221  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4222  	LONG $0x04e9c149         // shr    r9, 4
  4223  	LONG $0x01c18349         // add    r9, 1
  4224  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4225  	JE   LBB0_916
  4226  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4227  	LONG $0xfee08348         // and    rax, -2
  4228  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4229  	WORD $0xff31             // xor    edi, edi
  4230  
  4231  LBB0_918:
  4232  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4233  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4234  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4235  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4236  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4237  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4238  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4239  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4240  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4241  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4242  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4243  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4244  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4245  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4246  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4247  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4248  	LONG $0x20c78348                           // add    rdi, 32
  4249  	LONG $0x02c08348                           // add    rax, 2
  4250  	JNE  LBB0_918
  4251  	JMP  LBB0_919
  4252  
  4253  LBB0_929:
  4254  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4255  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4256  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4257  	LONG $0xd1970f41         // seta    r9b
  4258  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4259  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4260  	LONG $0xd3970f41         // seta    r11b
  4261  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4262  	WORD $0x970f; BYTE $0xd0 // seta    al
  4263  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4264  	LONG $0xd7970f40         // seta    dil
  4265  	WORD $0xf631             // xor    esi, esi
  4266  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4267  	JNE  LBB0_938
  4268  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4269  	JNE  LBB0_938
  4270  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4271  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4272  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4273  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4274  	LONG $0x04e9c149         // shr    r9, 4
  4275  	LONG $0x01c18349         // add    r9, 1
  4276  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4277  	JE   LBB0_932
  4278  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4279  	LONG $0xfee08348         // and    rax, -2
  4280  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4281  	WORD $0xff31             // xor    edi, edi
  4282  
  4283  LBB0_934:
  4284  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4285  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4286  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4287  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4288  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4289  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4290  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4291  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4292  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4293  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4294  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4295  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  4296  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4297  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  4298  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4299  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4300  	LONG $0x20c78348                           // add    rdi, 32
  4301  	LONG $0x02c08348                           // add    rax, 2
  4302  	JNE  LBB0_934
  4303  	JMP  LBB0_935
  4304  
  4305  LBB0_78:
  4306  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4307  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4308  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4309  	LONG $0xd1970f41         // seta    r9b
  4310  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4311  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4312  	LONG $0xd3970f41         // seta    r11b
  4313  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4314  	WORD $0x970f; BYTE $0xd0 // seta    al
  4315  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4316  	LONG $0xd7970f40         // seta    dil
  4317  	WORD $0xf631             // xor    esi, esi
  4318  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4319  	JNE  LBB0_87
  4320  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4321  	JNE  LBB0_87
  4322  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4323  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4324  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4325  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4326  	LONG $0x04e9c149         // shr    r9, 4
  4327  	LONG $0x01c18349         // add    r9, 1
  4328  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4329  	JE   LBB0_81
  4330  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4331  	LONG $0xfee08348         // and    rax, -2
  4332  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4333  	WORD $0xff31             // xor    edi, edi
  4334  
  4335  LBB0_83:
  4336  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4337  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4338  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4339  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4340  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4341  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4342  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4343  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4344  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4345  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4346  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4347  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4348  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4349  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4350  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4351  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4352  	LONG $0x20c78348                           // add    rdi, 32
  4353  	LONG $0x02c08348                           // add    rax, 2
  4354  	JNE  LBB0_83
  4355  	JMP  LBB0_84
  4356  
  4357  LBB0_94:
  4358  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4359  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4360  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4361  	LONG $0xd1970f41         // seta    r9b
  4362  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4363  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4364  	LONG $0xd3970f41         // seta    r11b
  4365  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4366  	WORD $0x970f; BYTE $0xd0 // seta    al
  4367  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4368  	LONG $0xd7970f40         // seta    dil
  4369  	WORD $0xf631             // xor    esi, esi
  4370  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4371  	JNE  LBB0_103
  4372  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4373  	JNE  LBB0_103
  4374  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4375  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4376  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4377  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4378  	LONG $0x04e9c149         // shr    r9, 4
  4379  	LONG $0x01c18349         // add    r9, 1
  4380  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4381  	JE   LBB0_97
  4382  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4383  	LONG $0xfee08348         // and    rax, -2
  4384  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4385  	WORD $0xff31             // xor    edi, edi
  4386  
  4387  LBB0_99:
  4388  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4389  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4390  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4391  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4392  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4393  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4394  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4395  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4396  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4397  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4398  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4399  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4400  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4401  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4402  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4403  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4404  	LONG $0x20c78348                           // add    rdi, 32
  4405  	LONG $0x02c08348                           // add    rax, 2
  4406  	JNE  LBB0_99
  4407  	JMP  LBB0_100
  4408  
  4409  LBB0_424:
  4410  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4411  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4412  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4413  	LONG $0xd1970f41         // seta    r9b
  4414  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4415  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4416  	LONG $0xd3970f41         // seta    r11b
  4417  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4418  	WORD $0x970f; BYTE $0xd0 // seta    al
  4419  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4420  	LONG $0xd7970f40         // seta    dil
  4421  	WORD $0xf631             // xor    esi, esi
  4422  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4423  	JNE  LBB0_433
  4424  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4425  	JNE  LBB0_433
  4426  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4427  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4428  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4429  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4430  	LONG $0x04e9c149         // shr    r9, 4
  4431  	LONG $0x01c18349         // add    r9, 1
  4432  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4433  	JE   LBB0_427
  4434  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4435  	LONG $0xfee08348         // and    rax, -2
  4436  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4437  	WORD $0xff31             // xor    edi, edi
  4438  
  4439  LBB0_429:
  4440  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4441  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4442  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4443  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4444  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  4445  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4446  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  4447  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  4448  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4449  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4450  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4451  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4452  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
  4453  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4454  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm0
  4455  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
  4456  	LONG $0x20c78348                           // add    rdi, 32
  4457  	LONG $0x02c08348                           // add    rax, 2
  4458  	JNE  LBB0_429
  4459  	JMP  LBB0_430
  4460  
  4461  LBB0_440:
  4462  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4463  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4464  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4465  	LONG $0xd1970f41         // seta    r9b
  4466  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4467  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4468  	LONG $0xd3970f41         // seta    r11b
  4469  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4470  	WORD $0x970f; BYTE $0xd0 // seta    al
  4471  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4472  	LONG $0xd7970f40         // seta    dil
  4473  	WORD $0xf631             // xor    esi, esi
  4474  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4475  	JNE  LBB0_449
  4476  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4477  	JNE  LBB0_449
  4478  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4479  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4480  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4481  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4482  	LONG $0x04e9c149         // shr    r9, 4
  4483  	LONG $0x01c18349         // add    r9, 1
  4484  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4485  	JE   LBB0_443
  4486  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4487  	LONG $0xfee08348         // and    rax, -2
  4488  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4489  	WORD $0xff31             // xor    edi, edi
  4490  
  4491  LBB0_445:
  4492  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4493  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4494  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4495  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4496  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  4497  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4498  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  4499  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  4500  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4501  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4502  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4503  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4504  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
  4505  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4506  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm0
  4507  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
  4508  	LONG $0x20c78348                           // add    rdi, 32
  4509  	LONG $0x02c08348                           // add    rax, 2
  4510  	JNE  LBB0_445
  4511  	JMP  LBB0_446
  4512  
  4513  LBB0_251:
  4514  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4515  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4516  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4517  	LONG $0xd1970f41         // seta    r9b
  4518  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4519  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4520  	LONG $0xd3970f41         // seta    r11b
  4521  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4522  	WORD $0x970f; BYTE $0xd0 // seta    al
  4523  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4524  	LONG $0xd7970f40         // seta    dil
  4525  	WORD $0xf631             // xor    esi, esi
  4526  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4527  	JNE  LBB0_260
  4528  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4529  	JNE  LBB0_260
  4530  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4531  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4532  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4533  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4534  	LONG $0x04e9c149         // shr    r9, 4
  4535  	LONG $0x01c18349         // add    r9, 1
  4536  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4537  	JE   LBB0_254
  4538  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4539  	LONG $0xfee08348         // and    rax, -2
  4540  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4541  	WORD $0xff31             // xor    edi, edi
  4542  
  4543  LBB0_256:
  4544  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4545  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4546  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4547  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4548  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4549  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4550  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4551  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4552  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4553  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4554  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4555  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4556  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4557  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4558  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4559  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4560  	LONG $0x20c78348                           // add    rdi, 32
  4561  	LONG $0x02c08348                           // add    rax, 2
  4562  	JNE  LBB0_256
  4563  	JMP  LBB0_257
  4564  
  4565  LBB0_267:
  4566  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4567  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4568  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4569  	LONG $0xd1970f41         // seta    r9b
  4570  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4571  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4572  	LONG $0xd3970f41         // seta    r11b
  4573  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4574  	WORD $0x970f; BYTE $0xd0 // seta    al
  4575  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4576  	LONG $0xd7970f40         // seta    dil
  4577  	WORD $0xf631             // xor    esi, esi
  4578  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4579  	JNE  LBB0_276
  4580  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4581  	JNE  LBB0_276
  4582  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4583  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4584  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4585  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4586  	LONG $0x04e9c149         // shr    r9, 4
  4587  	LONG $0x01c18349         // add    r9, 1
  4588  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4589  	JE   LBB0_270
  4590  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4591  	LONG $0xfee08348         // and    rax, -2
  4592  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4593  	WORD $0xff31             // xor    edi, edi
  4594  
  4595  LBB0_272:
  4596  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4597  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4598  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4599  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4600  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  4601  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4602  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  4603  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  4604  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4605  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4606  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4607  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  4608  	LONG $0x446f0ff3; WORD $0x3079             // movdqu    xmm0, oword [rcx + 2*rdi + 48]
  4609  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  4610  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm2
  4611  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm0
  4612  	LONG $0x20c78348                           // add    rdi, 32
  4613  	LONG $0x02c08348                           // add    rax, 2
  4614  	JNE  LBB0_272
  4615  	JMP  LBB0_273
  4616  
  4617  LBB0_597:
  4618  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4619  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4620  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4621  	LONG $0xd1970f41         // seta    r9b
  4622  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4623  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4624  	LONG $0xd3970f41         // seta    r11b
  4625  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4626  	WORD $0x970f; BYTE $0xd0 // seta    al
  4627  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4628  	LONG $0xd7970f40         // seta    dil
  4629  	WORD $0xf631             // xor    esi, esi
  4630  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4631  	JNE  LBB0_606
  4632  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4633  	JNE  LBB0_606
  4634  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4635  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4636  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4637  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4638  	LONG $0x04e9c149         // shr    r9, 4
  4639  	LONG $0x01c18349         // add    r9, 1
  4640  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4641  	JE   LBB0_600
  4642  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4643  	LONG $0xfee08348         // and    rax, -2
  4644  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4645  	WORD $0xff31             // xor    edi, edi
  4646  
  4647  LBB0_602:
  4648  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4649  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4650  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4651  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4652  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  4653  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4654  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  4655  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  4656  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4657  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4658  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4659  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4660  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
  4661  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4662  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm0
  4663  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
  4664  	LONG $0x20c78348                           // add    rdi, 32
  4665  	LONG $0x02c08348                           // add    rax, 2
  4666  	JNE  LBB0_602
  4667  	JMP  LBB0_603
  4668  
  4669  LBB0_613:
  4670  	LONG $0x50348d4b         // lea    rsi, [r8 + 2*r10]
  4671  	LONG $0x52048d4a         // lea    rax, [rdx + 2*r10]
  4672  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4673  	LONG $0xd1970f41         // seta    r9b
  4674  	LONG $0x51048d4a         // lea    rax, [rcx + 2*r10]
  4675  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4676  	LONG $0xd3970f41         // seta    r11b
  4677  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4678  	WORD $0x970f; BYTE $0xd0 // seta    al
  4679  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4680  	LONG $0xd7970f40         // seta    dil
  4681  	WORD $0xf631             // xor    esi, esi
  4682  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4683  	JNE  LBB0_622
  4684  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4685  	JNE  LBB0_622
  4686  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4687  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
  4688  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
  4689  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4690  	LONG $0x04e9c149         // shr    r9, 4
  4691  	LONG $0x01c18349         // add    r9, 1
  4692  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4693  	JE   LBB0_616
  4694  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4695  	LONG $0xfee08348         // and    rax, -2
  4696  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4697  	WORD $0xff31             // xor    edi, edi
  4698  
  4699  LBB0_618:
  4700  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  4701  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  4702  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  4703  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4704  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  4705  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4706  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  4707  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  4708  	LONG $0x446f0ff3; WORD $0x207a             // movdqu    xmm0, oword [rdx + 2*rdi + 32]
  4709  	LONG $0x4c6f0ff3; WORD $0x307a             // movdqu    xmm1, oword [rdx + 2*rdi + 48]
  4710  	LONG $0x546f0ff3; WORD $0x2079             // movdqu    xmm2, oword [rcx + 2*rdi + 32]
  4711  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  4712  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
  4713  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  4714  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm0
  4715  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
  4716  	LONG $0x20c78348                           // add    rdi, 32
  4717  	LONG $0x02c08348                           // add    rax, 2
  4718  	JNE  LBB0_618
  4719  	JMP  LBB0_619
  4720  
  4721  LBB0_829:
  4722  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4723  	WORD $0xff31             // xor    edi, edi
  4724  
  4725  LBB0_830:
  4726  	LONG $0xf9048b48               // mov    rax, qword [rcx + 8*rdi]
  4727  	LONG $0x04af0f48; BYTE $0xfa   // imul    rax, qword [rdx + 8*rdi]
  4728  	LONG $0xf8048949               // mov    qword [r8 + 8*rdi], rax
  4729  	LONG $0xf9448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rdi + 8]
  4730  	LONG $0x44af0f48; WORD $0x08fa // imul    rax, qword [rdx + 8*rdi + 8]
  4731  	LONG $0xf8448949; BYTE $0x08   // mov    qword [r8 + 8*rdi + 8], rax
  4732  	LONG $0xf9448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rdi + 16]
  4733  	LONG $0x44af0f48; WORD $0x10fa // imul    rax, qword [rdx + 8*rdi + 16]
  4734  	LONG $0xf8448949; BYTE $0x10   // mov    qword [r8 + 8*rdi + 16], rax
  4735  	LONG $0xf9448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rdi + 24]
  4736  	LONG $0x44af0f48; WORD $0x18fa // imul    rax, qword [rdx + 8*rdi + 24]
  4737  	LONG $0xf8448949; BYTE $0x18   // mov    qword [r8 + 8*rdi + 24], rax
  4738  	LONG $0x04c78348               // add    rdi, 4
  4739  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  4740  	JNE  LBB0_830
  4741  
  4742  LBB0_831:
  4743  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
  4744  	JE   LBB0_1013
  4745  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
  4746  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  4747  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  4748  	WORD $0xff31             // xor    edi, edi
  4749  
  4750  LBB0_833:
  4751  	LONG $0xf9048b48             // mov    rax, qword [rcx + 8*rdi]
  4752  	LONG $0x04af0f48; BYTE $0xfa // imul    rax, qword [rdx + 8*rdi]
  4753  	LONG $0xfe048948             // mov    qword [rsi + 8*rdi], rax
  4754  	LONG $0x01c78348             // add    rdi, 1
  4755  	WORD $0x3949; BYTE $0xf9     // cmp    r9, rdi
  4756  	JNE  LBB0_833
  4757  	JMP  LBB0_1013
  4758  
  4759  LBB0_837:
  4760  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  4761  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  4762  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4763  	LONG $0xd1970f41         // seta    r9b
  4764  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  4765  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4766  	LONG $0xd3970f41         // seta    r11b
  4767  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4768  	WORD $0x970f; BYTE $0xd0 // seta    al
  4769  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4770  	LONG $0xd7970f40         // seta    dil
  4771  	WORD $0xf631             // xor    esi, esi
  4772  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4773  	JNE  LBB0_846
  4774  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4775  	JNE  LBB0_846
  4776  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4777  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4778  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4779  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4780  	LONG $0x03e9c149         // shr    r9, 3
  4781  	LONG $0x01c18349         // add    r9, 1
  4782  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4783  	JE   LBB0_840
  4784  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4785  	LONG $0xfee08348         // and    rax, -2
  4786  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4787  	WORD $0xff31             // xor    edi, edi
  4788  
  4789  LBB0_842:
  4790  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  4791  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  4792  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  4793  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  4794  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  4795  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  4796  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  4797  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  4798  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  4799  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  4800  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  4801  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  4802  	LONG $0xb944100f; BYTE $0x30   // movups    xmm0, oword [rcx + 4*rdi + 48]
  4803  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  4804  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
  4805  	LONG $0x44110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm0
  4806  	LONG $0x10c78348               // add    rdi, 16
  4807  	LONG $0x02c08348               // add    rax, 2
  4808  	JNE  LBB0_842
  4809  	JMP  LBB0_843
  4810  
  4811  LBB0_979:
  4812  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4813  	WORD $0xff31             // xor    edi, edi
  4814  
  4815  LBB0_980:
  4816  	LONG $0xf9048b48               // mov    rax, qword [rcx + 8*rdi]
  4817  	LONG $0x04af0f48; BYTE $0xfa   // imul    rax, qword [rdx + 8*rdi]
  4818  	LONG $0xf8048949               // mov    qword [r8 + 8*rdi], rax
  4819  	LONG $0xf9448b48; BYTE $0x08   // mov    rax, qword [rcx + 8*rdi + 8]
  4820  	LONG $0x44af0f48; WORD $0x08fa // imul    rax, qword [rdx + 8*rdi + 8]
  4821  	LONG $0xf8448949; BYTE $0x08   // mov    qword [r8 + 8*rdi + 8], rax
  4822  	LONG $0xf9448b48; BYTE $0x10   // mov    rax, qword [rcx + 8*rdi + 16]
  4823  	LONG $0x44af0f48; WORD $0x10fa // imul    rax, qword [rdx + 8*rdi + 16]
  4824  	LONG $0xf8448949; BYTE $0x10   // mov    qword [r8 + 8*rdi + 16], rax
  4825  	LONG $0xf9448b48; BYTE $0x18   // mov    rax, qword [rcx + 8*rdi + 24]
  4826  	LONG $0x44af0f48; WORD $0x18fa // imul    rax, qword [rdx + 8*rdi + 24]
  4827  	LONG $0xf8448949; BYTE $0x18   // mov    qword [r8 + 8*rdi + 24], rax
  4828  	LONG $0x04c78348               // add    rdi, 4
  4829  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
  4830  	JNE  LBB0_980
  4831  
  4832  LBB0_981:
  4833  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
  4834  	JE   LBB0_1013
  4835  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
  4836  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
  4837  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
  4838  	WORD $0xff31             // xor    edi, edi
  4839  
  4840  LBB0_983:
  4841  	LONG $0xf9048b48             // mov    rax, qword [rcx + 8*rdi]
  4842  	LONG $0x04af0f48; BYTE $0xfa // imul    rax, qword [rdx + 8*rdi]
  4843  	LONG $0xfe048948             // mov    qword [rsi + 8*rdi], rax
  4844  	LONG $0x01c78348             // add    rdi, 1
  4845  	WORD $0x3949; BYTE $0xf9     // cmp    r9, rdi
  4846  	JNE  LBB0_983
  4847  
  4848  LBB0_1013:
  4849  	RET
  4850  
  4851  LBB0_987:
  4852  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  4853  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  4854  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4855  	LONG $0xd1970f41         // seta    r9b
  4856  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  4857  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4858  	LONG $0xd3970f41         // seta    r11b
  4859  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4860  	WORD $0x970f; BYTE $0xd0 // seta    al
  4861  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4862  	LONG $0xd7970f40         // seta    dil
  4863  	WORD $0xf631             // xor    esi, esi
  4864  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4865  	JNE  LBB0_996
  4866  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4867  	JNE  LBB0_996
  4868  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4869  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4870  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4871  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4872  	LONG $0x03e9c149         // shr    r9, 3
  4873  	LONG $0x01c18349         // add    r9, 1
  4874  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4875  	JE   LBB0_990
  4876  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4877  	LONG $0xfee08348         // and    rax, -2
  4878  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4879  	WORD $0xff31             // xor    edi, edi
  4880  
  4881  LBB0_992:
  4882  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  4883  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  4884  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  4885  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  4886  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  4887  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  4888  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  4889  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  4890  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  4891  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  4892  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  4893  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  4894  	LONG $0xb944100f; BYTE $0x30   // movups    xmm0, oword [rcx + 4*rdi + 48]
  4895  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  4896  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
  4897  	LONG $0x44110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm0
  4898  	LONG $0x10c78348               // add    rdi, 16
  4899  	LONG $0x02c08348               // add    rax, 2
  4900  	JNE  LBB0_992
  4901  	JMP  LBB0_993
  4902  
  4903  LBB0_152:
  4904  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  4905  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  4906  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4907  	LONG $0xd1970f41         // seta    r9b
  4908  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  4909  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4910  	LONG $0xd3970f41         // seta    r11b
  4911  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4912  	WORD $0x970f; BYTE $0xd0 // seta    al
  4913  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4914  	LONG $0xd7970f40         // seta    dil
  4915  	WORD $0xf631             // xor    esi, esi
  4916  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4917  	JNE  LBB0_161
  4918  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4919  	JNE  LBB0_161
  4920  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4921  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  4922  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  4923  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4924  	LONG $0x02e9c149         // shr    r9, 2
  4925  	LONG $0x01c18349         // add    r9, 1
  4926  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4927  	JE   LBB0_155
  4928  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4929  	LONG $0xfee08348         // and    rax, -2
  4930  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4931  	WORD $0xff31             // xor    edi, edi
  4932  
  4933  LBB0_157:
  4934  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  4935  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  4936  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  4937  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  4938  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  4939  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  4940  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  4941  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  4942  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  4943  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  4944  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  4945  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  4946  	LONG $0x446f0ff3; WORD $0x30f9             // movdqu    xmm0, oword [rcx + 8*rdi + 48]
  4947  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  4948  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm2
  4949  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm0
  4950  	LONG $0x08c78348                           // add    rdi, 8
  4951  	LONG $0x02c08348                           // add    rax, 2
  4952  	JNE  LBB0_157
  4953  	JMP  LBB0_158
  4954  
  4955  LBB0_168:
  4956  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  4957  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  4958  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4959  	LONG $0xd1970f41         // seta    r9b
  4960  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  4961  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  4962  	LONG $0xd3970f41         // seta    r11b
  4963  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  4964  	WORD $0x970f; BYTE $0xd0 // seta    al
  4965  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  4966  	LONG $0xd7970f40         // seta    dil
  4967  	WORD $0xf631             // xor    esi, esi
  4968  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  4969  	JNE  LBB0_177
  4970  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  4971  	JNE  LBB0_177
  4972  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  4973  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  4974  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  4975  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  4976  	LONG $0x03e9c149         // shr    r9, 3
  4977  	LONG $0x01c18349         // add    r9, 1
  4978  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  4979  	JE   LBB0_171
  4980  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  4981  	LONG $0xfee08348         // and    rax, -2
  4982  	WORD $0xf748; BYTE $0xd8 // neg    rax
  4983  	WORD $0xff31             // xor    edi, edi
  4984  
  4985  LBB0_173:
  4986  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  4987  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  4988  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  4989  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  4990  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  4991  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  4992  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  4993  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  4994  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  4995  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  4996  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  4997  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  4998  	LONG $0xb944100f; BYTE $0x30   // movups    xmm0, oword [rcx + 4*rdi + 48]
  4999  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  5000  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
  5001  	LONG $0x44110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm0
  5002  	LONG $0x10c78348               // add    rdi, 16
  5003  	LONG $0x02c08348               // add    rax, 2
  5004  	JNE  LBB0_173
  5005  	JMP  LBB0_174
  5006  
  5007  LBB0_498:
  5008  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  5009  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  5010  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5011  	LONG $0xd1970f41         // seta    r9b
  5012  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  5013  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5014  	LONG $0xd3970f41         // seta    r11b
  5015  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5016  	WORD $0x970f; BYTE $0xd0 // seta    al
  5017  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5018  	LONG $0xd7970f40         // seta    dil
  5019  	WORD $0xf631             // xor    esi, esi
  5020  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5021  	JNE  LBB0_507
  5022  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5023  	JNE  LBB0_507
  5024  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5025  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5026  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5027  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5028  	LONG $0x02e9c149         // shr    r9, 2
  5029  	LONG $0x01c18349         // add    r9, 1
  5030  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5031  	JE   LBB0_501
  5032  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5033  	LONG $0xfee08348         // and    rax, -2
  5034  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5035  	WORD $0xff31             // xor    edi, edi
  5036  
  5037  LBB0_503:
  5038  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  5039  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  5040  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  5041  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  5042  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  5043  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  5044  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  5045  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  5046  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  5047  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  5048  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  5049  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  5050  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
  5051  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  5052  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm0
  5053  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
  5054  	LONG $0x08c78348                           // add    rdi, 8
  5055  	LONG $0x02c08348                           // add    rax, 2
  5056  	JNE  LBB0_503
  5057  	JMP  LBB0_504
  5058  
  5059  LBB0_514:
  5060  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5061  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5062  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5063  	LONG $0xd1970f41         // seta    r9b
  5064  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5065  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5066  	LONG $0xd3970f41         // seta    r11b
  5067  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5068  	WORD $0x970f; BYTE $0xd0 // seta    al
  5069  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5070  	LONG $0xd7970f40         // seta    dil
  5071  	WORD $0xf631             // xor    esi, esi
  5072  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5073  	JNE  LBB0_523
  5074  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5075  	JNE  LBB0_523
  5076  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5077  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5078  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5079  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5080  	LONG $0x03e9c149         // shr    r9, 3
  5081  	LONG $0x01c18349         // add    r9, 1
  5082  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5083  	JE   LBB0_517
  5084  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5085  	LONG $0xfee08348         // and    rax, -2
  5086  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5087  	WORD $0xff31             // xor    edi, edi
  5088  
  5089  LBB0_519:
  5090  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  5091  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  5092  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  5093  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  5094  	LONG $0xb954100f; BYTE $0x10   // movups    xmm2, oword [rcx + 4*rdi + 16]
  5095  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  5096  	LONG $0x04110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm0
  5097  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
  5098  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  5099  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  5100  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  5101  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  5102  	LONG $0xb954100f; BYTE $0x30   // movups    xmm2, oword [rcx + 4*rdi + 48]
  5103  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  5104  	LONG $0x44110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm0
  5105  	LONG $0x4c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm1
  5106  	LONG $0x10c78348               // add    rdi, 16
  5107  	LONG $0x02c08348               // add    rax, 2
  5108  	JNE  LBB0_519
  5109  	JMP  LBB0_520
  5110  
  5111  LBB0_325:
  5112  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  5113  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  5114  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5115  	LONG $0xd1970f41         // seta    r9b
  5116  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  5117  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5118  	LONG $0xd3970f41         // seta    r11b
  5119  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5120  	WORD $0x970f; BYTE $0xd0 // seta    al
  5121  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5122  	LONG $0xd7970f40         // seta    dil
  5123  	WORD $0xf631             // xor    esi, esi
  5124  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5125  	JNE  LBB0_334
  5126  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5127  	JNE  LBB0_334
  5128  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5129  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5130  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5131  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5132  	LONG $0x02e9c149         // shr    r9, 2
  5133  	LONG $0x01c18349         // add    r9, 1
  5134  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5135  	JE   LBB0_328
  5136  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5137  	LONG $0xfee08348         // and    rax, -2
  5138  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5139  	WORD $0xff31             // xor    edi, edi
  5140  
  5141  LBB0_330:
  5142  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  5143  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  5144  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  5145  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  5146  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  5147  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  5148  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  5149  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  5150  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  5151  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  5152  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  5153  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  5154  	LONG $0x446f0ff3; WORD $0x30f9             // movdqu    xmm0, oword [rcx + 8*rdi + 48]
  5155  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  5156  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm2
  5157  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm0
  5158  	LONG $0x08c78348                           // add    rdi, 8
  5159  	LONG $0x02c08348                           // add    rax, 2
  5160  	JNE  LBB0_330
  5161  	JMP  LBB0_331
  5162  
  5163  LBB0_341:
  5164  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5165  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5166  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5167  	LONG $0xd1970f41         // seta    r9b
  5168  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5169  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5170  	LONG $0xd3970f41         // seta    r11b
  5171  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5172  	WORD $0x970f; BYTE $0xd0 // seta    al
  5173  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5174  	LONG $0xd7970f40         // seta    dil
  5175  	WORD $0xf631             // xor    esi, esi
  5176  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5177  	JNE  LBB0_350
  5178  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5179  	JNE  LBB0_350
  5180  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5181  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5182  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5183  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5184  	LONG $0x03e9c149         // shr    r9, 3
  5185  	LONG $0x01c18349         // add    r9, 1
  5186  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5187  	JE   LBB0_344
  5188  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5189  	LONG $0xfee08348         // and    rax, -2
  5190  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5191  	WORD $0xff31             // xor    edi, edi
  5192  
  5193  LBB0_346:
  5194  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  5195  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  5196  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  5197  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  5198  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  5199  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  5200  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  5201  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  5202  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  5203  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  5204  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  5205  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  5206  	LONG $0xb944100f; BYTE $0x30   // movups    xmm0, oword [rcx + 4*rdi + 48]
  5207  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  5208  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
  5209  	LONG $0x44110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm0
  5210  	LONG $0x10c78348               // add    rdi, 16
  5211  	LONG $0x02c08348               // add    rax, 2
  5212  	JNE  LBB0_346
  5213  	JMP  LBB0_347
  5214  
  5215  LBB0_671:
  5216  	LONG $0xd0348d4b         // lea    rsi, [r8 + 8*r10]
  5217  	LONG $0xd2048d4a         // lea    rax, [rdx + 8*r10]
  5218  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5219  	LONG $0xd1970f41         // seta    r9b
  5220  	LONG $0xd1048d4a         // lea    rax, [rcx + 8*r10]
  5221  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5222  	LONG $0xd3970f41         // seta    r11b
  5223  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5224  	WORD $0x970f; BYTE $0xd0 // seta    al
  5225  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5226  	LONG $0xd7970f40         // seta    dil
  5227  	WORD $0xf631             // xor    esi, esi
  5228  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5229  	JNE  LBB0_680
  5230  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5231  	JNE  LBB0_680
  5232  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5233  	WORD $0xe683; BYTE $0xfc // and    esi, -4
  5234  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
  5235  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5236  	LONG $0x02e9c149         // shr    r9, 2
  5237  	LONG $0x01c18349         // add    r9, 1
  5238  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5239  	JE   LBB0_674
  5240  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5241  	LONG $0xfee08348         // and    rax, -2
  5242  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5243  	WORD $0xff31             // xor    edi, edi
  5244  
  5245  LBB0_676:
  5246  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  5247  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  5248  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  5249  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  5250  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  5251  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  5252  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  5253  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  5254  	LONG $0x446f0ff3; WORD $0x20fa             // movdqu    xmm0, oword [rdx + 8*rdi + 32]
  5255  	LONG $0x4c6f0ff3; WORD $0x30fa             // movdqu    xmm1, oword [rdx + 8*rdi + 48]
  5256  	LONG $0x546f0ff3; WORD $0x20f9             // movdqu    xmm2, oword [rcx + 8*rdi + 32]
  5257  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  5258  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
  5259  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  5260  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm0
  5261  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
  5262  	LONG $0x08c78348                           // add    rdi, 8
  5263  	LONG $0x02c08348                           // add    rax, 2
  5264  	JNE  LBB0_676
  5265  	JMP  LBB0_677
  5266  
  5267  LBB0_687:
  5268  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5269  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5270  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5271  	LONG $0xd1970f41         // seta    r9b
  5272  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5273  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5274  	LONG $0xd3970f41         // seta    r11b
  5275  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5276  	WORD $0x970f; BYTE $0xd0 // seta    al
  5277  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5278  	LONG $0xd7970f40         // seta    dil
  5279  	WORD $0xf631             // xor    esi, esi
  5280  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5281  	JNE  LBB0_696
  5282  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5283  	JNE  LBB0_696
  5284  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5285  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5286  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5287  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5288  	LONG $0x03e9c149         // shr    r9, 3
  5289  	LONG $0x01c18349         // add    r9, 1
  5290  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5291  	JE   LBB0_690
  5292  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5293  	LONG $0xfee08348         // and    rax, -2
  5294  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5295  	WORD $0xff31             // xor    edi, edi
  5296  
  5297  LBB0_692:
  5298  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  5299  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  5300  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  5301  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  5302  	LONG $0xb954100f; BYTE $0x10   // movups    xmm2, oword [rcx + 4*rdi + 16]
  5303  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  5304  	LONG $0x04110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm0
  5305  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
  5306  	LONG $0xba44100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rdi + 32]
  5307  	LONG $0xba4c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rdi + 48]
  5308  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
  5309  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  5310  	LONG $0xb954100f; BYTE $0x30   // movups    xmm2, oword [rcx + 4*rdi + 48]
  5311  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  5312  	LONG $0x44110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm0
  5313  	LONG $0x4c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm1
  5314  	LONG $0x10c78348               // add    rdi, 16
  5315  	LONG $0x02c08348               // add    rax, 2
  5316  	JNE  LBB0_692
  5317  	JMP  LBB0_693
  5318  
  5319  LBB0_734:
  5320  	LONG $0x10348d4b             // lea    rsi, [r8 + r10]
  5321  	LONG $0x12048d4a             // lea    rax, [rdx + r10]
  5322  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  5323  	LONG $0xd1970f41             // seta    r9b
  5324  	LONG $0x11048d4a             // lea    rax, [rcx + r10]
  5325  	WORD $0x3948; BYTE $0xd6     // cmp    rsi, rdx
  5326  	LONG $0xd3970f41             // seta    r11b
  5327  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  5328  	WORD $0x970f; BYTE $0xd0     // seta    al
  5329  	WORD $0x3948; BYTE $0xce     // cmp    rsi, rcx
  5330  	LONG $0xd6970f40             // seta    sil
  5331  	WORD $0xff31                 // xor    edi, edi
  5332  	WORD $0x8445; BYTE $0xd9     // test    r9b, r11b
  5333  	JNE  LBB0_743
  5334  	WORD $0x2040; BYTE $0xf0     // and    al, sil
  5335  	JNE  LBB0_743
  5336  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
  5337  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
  5338  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
  5339  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
  5340  	LONG $0x05e9c149             // shr    r9, 5
  5341  	LONG $0x01c18349             // add    r9, 1
  5342  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5343  	JE   LBB0_737
  5344  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
  5345  	LONG $0xfee68348             // and    rsi, -2
  5346  	WORD $0xf748; BYTE $0xde     // neg    rsi
  5347  	WORD $0xc031                 // xor    eax, eax
  5348  	LONG $0x456f0f66; BYTE $0x00 // movdqa    xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
  5349  
  5350  LBB0_739:
  5351  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  5352  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  5353  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  5354  	LONG $0x646f0ff3; WORD $0x1001             // movdqu    xmm4, oword [rcx + rax + 16]
  5355  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  5356  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  5357  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  5358  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  5359  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5360  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5361  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  5362  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  5363  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  5364  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  5365  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  5366  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  5367  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  5368  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  5369  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  5370  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5371  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5372  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  5373  	LONG $0x7f0f41f3; WORD $0x0034             // movdqu    oword [r8 + rax], xmm6
  5374  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm3
  5375  	LONG $0x4c6f0ff3; WORD $0x2002             // movdqu    xmm1, oword [rdx + rax + 32]
  5376  	LONG $0x546f0ff3; WORD $0x3002             // movdqu    xmm2, oword [rdx + rax + 48]
  5377  	LONG $0x5c6f0ff3; WORD $0x2001             // movdqu    xmm3, oword [rcx + rax + 32]
  5378  	LONG $0x646f0ff3; WORD $0x3001             // movdqu    xmm4, oword [rcx + rax + 48]
  5379  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  5380  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  5381  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  5382  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  5383  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5384  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5385  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  5386  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  5387  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  5388  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  5389  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  5390  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  5391  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  5392  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  5393  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  5394  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5395  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5396  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  5397  	LONG $0x7f0f41f3; WORD $0x0074; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm6
  5398  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm3
  5399  	LONG $0x40c08348                           // add    rax, 64
  5400  	LONG $0x02c68348                           // add    rsi, 2
  5401  	JNE  LBB0_739
  5402  	JMP  LBB0_740
  5403  
  5404  LBB0_884:
  5405  	LONG $0x10348d4b             // lea    rsi, [r8 + r10]
  5406  	LONG $0x12048d4a             // lea    rax, [rdx + r10]
  5407  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  5408  	LONG $0xd1970f41             // seta    r9b
  5409  	LONG $0x11048d4a             // lea    rax, [rcx + r10]
  5410  	WORD $0x3948; BYTE $0xd6     // cmp    rsi, rdx
  5411  	LONG $0xd3970f41             // seta    r11b
  5412  	WORD $0x394c; BYTE $0xc0     // cmp    rax, r8
  5413  	WORD $0x970f; BYTE $0xd0     // seta    al
  5414  	WORD $0x3948; BYTE $0xce     // cmp    rsi, rcx
  5415  	LONG $0xd6970f40             // seta    sil
  5416  	WORD $0xff31                 // xor    edi, edi
  5417  	WORD $0x8445; BYTE $0xd9     // test    r9b, r11b
  5418  	JNE  LBB0_893
  5419  	WORD $0x2040; BYTE $0xf0     // and    al, sil
  5420  	JNE  LBB0_893
  5421  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
  5422  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
  5423  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
  5424  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
  5425  	LONG $0x05e9c149             // shr    r9, 5
  5426  	LONG $0x01c18349             // add    r9, 1
  5427  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
  5428  	JE   LBB0_887
  5429  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
  5430  	LONG $0xfee68348             // and    rsi, -2
  5431  	WORD $0xf748; BYTE $0xde     // neg    rsi
  5432  	WORD $0xc031                 // xor    eax, eax
  5433  	LONG $0x456f0f66; BYTE $0x00 // movdqa    xmm0, oword 0[rbp] /* [rip + .LCPI0_0] */
  5434  
  5435  LBB0_889:
  5436  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  5437  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  5438  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  5439  	LONG $0x646f0ff3; WORD $0x1001             // movdqu    xmm4, oword [rcx + rax + 16]
  5440  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  5441  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  5442  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  5443  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  5444  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5445  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5446  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  5447  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  5448  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  5449  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  5450  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  5451  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  5452  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  5453  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  5454  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  5455  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5456  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5457  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  5458  	LONG $0x7f0f41f3; WORD $0x0034             // movdqu    oword [r8 + rax], xmm6
  5459  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm3
  5460  	LONG $0x4c6f0ff3; WORD $0x2002             // movdqu    xmm1, oword [rdx + rax + 32]
  5461  	LONG $0x546f0ff3; WORD $0x3002             // movdqu    xmm2, oword [rdx + rax + 48]
  5462  	LONG $0x5c6f0ff3; WORD $0x2001             // movdqu    xmm3, oword [rcx + rax + 32]
  5463  	LONG $0x646f0ff3; WORD $0x3001             // movdqu    xmm4, oword [rcx + rax + 48]
  5464  	LONG $0x30380f66; BYTE $0xe9               // pmovzxbw    xmm5, xmm1
  5465  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  5466  	LONG $0x30380f66; BYTE $0xf3               // pmovzxbw    xmm6, xmm3
  5467  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  5468  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5469  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5470  	LONG $0xf5d50f66                           // pmullw    xmm6, xmm5
  5471  	LONG $0xf0db0f66                           // pand    xmm6, xmm0
  5472  	LONG $0xf3670f66                           // packuswb    xmm6, xmm3
  5473  	LONG $0x30380f66; BYTE $0xca               // pmovzxbw    xmm1, xmm2
  5474  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  5475  	LONG $0x30380f66; BYTE $0xdc               // pmovzxbw    xmm3, xmm4
  5476  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
  5477  	LONG $0xe2d50f66                           // pmullw    xmm4, xmm2
  5478  	LONG $0xe0db0f66                           // pand    xmm4, xmm0
  5479  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  5480  	LONG $0xd8db0f66                           // pand    xmm3, xmm0
  5481  	LONG $0xdc670f66                           // packuswb    xmm3, xmm4
  5482  	LONG $0x7f0f41f3; WORD $0x0074; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm6
  5483  	LONG $0x7f0f41f3; WORD $0x005c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm3
  5484  	LONG $0x40c08348                           // add    rax, 64
  5485  	LONG $0x02c68348                           // add    rsi, 2
  5486  	JNE  LBB0_889
  5487  	JMP  LBB0_890
  5488  
  5489  LBB0_49:
  5490  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  5491  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  5492  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5493  	LONG $0xd1970f41         // seta    r9b
  5494  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  5495  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5496  	LONG $0xd3970f41         // seta    r11b
  5497  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5498  	WORD $0x970f; BYTE $0xd0 // seta    al
  5499  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5500  	LONG $0xd7970f40         // seta    dil
  5501  	WORD $0xf631             // xor    esi, esi
  5502  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5503  	JNE  LBB0_58
  5504  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5505  	JNE  LBB0_58
  5506  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5507  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5508  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5509  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5510  	LONG $0x05e9c149         // shr    r9, 5
  5511  	LONG $0x01c18349         // add    r9, 1
  5512  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5513  	JE   LBB0_52
  5514  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5515  	LONG $0xfee08348         // and    rax, -2
  5516  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5517  	WORD $0xff31             // xor    edi, edi
  5518  
  5519  LBB0_54:
  5520  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  5521  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  5522  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  5523  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  5524  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  5525  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  5526  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  5527  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  5528  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  5529  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  5530  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  5531  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  5532  	LONG $0x446f0ff3; WORD $0x3039             // movdqu    xmm0, oword [rcx + rdi + 48]
  5533  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  5534  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm2
  5535  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm0
  5536  	LONG $0x40c78348                           // add    rdi, 64
  5537  	LONG $0x02c08348                           // add    rax, 2
  5538  	JNE  LBB0_54
  5539  	JMP  LBB0_55
  5540  
  5541  LBB0_395:
  5542  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  5543  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  5544  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5545  	LONG $0xd1970f41         // seta    r9b
  5546  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  5547  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5548  	LONG $0xd3970f41         // seta    r11b
  5549  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5550  	WORD $0x970f; BYTE $0xd0 // seta    al
  5551  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5552  	LONG $0xd7970f40         // seta    dil
  5553  	WORD $0xf631             // xor    esi, esi
  5554  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5555  	JNE  LBB0_404
  5556  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5557  	JNE  LBB0_404
  5558  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5559  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5560  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5561  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5562  	LONG $0x05e9c149         // shr    r9, 5
  5563  	LONG $0x01c18349         // add    r9, 1
  5564  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5565  	JE   LBB0_398
  5566  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5567  	LONG $0xfee08348         // and    rax, -2
  5568  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5569  	WORD $0xff31             // xor    edi, edi
  5570  
  5571  LBB0_400:
  5572  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  5573  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  5574  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  5575  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  5576  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  5577  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  5578  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  5579  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  5580  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  5581  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  5582  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  5583  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  5584  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
  5585  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  5586  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm0
  5587  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
  5588  	LONG $0x40c78348                           // add    rdi, 64
  5589  	LONG $0x02c08348                           // add    rax, 2
  5590  	JNE  LBB0_400
  5591  	JMP  LBB0_401
  5592  
  5593  LBB0_222:
  5594  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  5595  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  5596  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5597  	LONG $0xd1970f41         // seta    r9b
  5598  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  5599  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5600  	LONG $0xd3970f41         // seta    r11b
  5601  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5602  	WORD $0x970f; BYTE $0xd0 // seta    al
  5603  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5604  	LONG $0xd7970f40         // seta    dil
  5605  	WORD $0xf631             // xor    esi, esi
  5606  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5607  	JNE  LBB0_231
  5608  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5609  	JNE  LBB0_231
  5610  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5611  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5612  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5613  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5614  	LONG $0x05e9c149         // shr    r9, 5
  5615  	LONG $0x01c18349         // add    r9, 1
  5616  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5617  	JE   LBB0_225
  5618  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5619  	LONG $0xfee08348         // and    rax, -2
  5620  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5621  	WORD $0xff31             // xor    edi, edi
  5622  
  5623  LBB0_227:
  5624  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  5625  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  5626  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  5627  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  5628  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  5629  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  5630  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  5631  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  5632  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  5633  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  5634  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  5635  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  5636  	LONG $0x446f0ff3; WORD $0x3039             // movdqu    xmm0, oword [rcx + rdi + 48]
  5637  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  5638  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm2
  5639  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm0
  5640  	LONG $0x40c78348                           // add    rdi, 64
  5641  	LONG $0x02c08348                           // add    rax, 2
  5642  	JNE  LBB0_227
  5643  	JMP  LBB0_228
  5644  
  5645  LBB0_568:
  5646  	LONG $0x10348d4b         // lea    rsi, [r8 + r10]
  5647  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  5648  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5649  	LONG $0xd1970f41         // seta    r9b
  5650  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
  5651  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5652  	LONG $0xd3970f41         // seta    r11b
  5653  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5654  	WORD $0x970f; BYTE $0xd0 // seta    al
  5655  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5656  	LONG $0xd7970f40         // seta    dil
  5657  	WORD $0xf631             // xor    esi, esi
  5658  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5659  	JNE  LBB0_577
  5660  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5661  	JNE  LBB0_577
  5662  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5663  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
  5664  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
  5665  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5666  	LONG $0x05e9c149         // shr    r9, 5
  5667  	LONG $0x01c18349         // add    r9, 1
  5668  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5669  	JE   LBB0_571
  5670  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5671  	LONG $0xfee08348         // and    rax, -2
  5672  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5673  	WORD $0xff31             // xor    edi, edi
  5674  
  5675  LBB0_573:
  5676  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  5677  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  5678  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  5679  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  5680  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  5681  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  5682  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  5683  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  5684  	LONG $0x446f0ff3; WORD $0x203a             // movdqu    xmm0, oword [rdx + rdi + 32]
  5685  	LONG $0x4c6f0ff3; WORD $0x303a             // movdqu    xmm1, oword [rdx + rdi + 48]
  5686  	LONG $0x546f0ff3; WORD $0x2039             // movdqu    xmm2, oword [rcx + rdi + 32]
  5687  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  5688  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
  5689  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  5690  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm0
  5691  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
  5692  	LONG $0x40c78348                           // add    rdi, 64
  5693  	LONG $0x02c08348                           // add    rax, 2
  5694  	JNE  LBB0_573
  5695  	JMP  LBB0_574
  5696  
  5697  LBB0_808:
  5698  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5699  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5700  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5701  	LONG $0xd1970f41         // seta    r9b
  5702  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5703  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5704  	LONG $0xd3970f41         // seta    r11b
  5705  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5706  	WORD $0x970f; BYTE $0xd0 // seta    al
  5707  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5708  	LONG $0xd7970f40         // seta    dil
  5709  	WORD $0xf631             // xor    esi, esi
  5710  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5711  	JNE  LBB0_817
  5712  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5713  	JNE  LBB0_817
  5714  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5715  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5716  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5717  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5718  	LONG $0x03e9c149         // shr    r9, 3
  5719  	LONG $0x01c18349         // add    r9, 1
  5720  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5721  	JE   LBB0_811
  5722  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5723  	LONG $0xfee08348         // and    rax, -2
  5724  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5725  	WORD $0xff31             // xor    edi, edi
  5726  
  5727  LBB0_813:
  5728  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5729  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5730  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5731  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  5732  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  5733  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  5734  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  5735  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  5736  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5737  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5738  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5739  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  5740  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  5741  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  5742  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  5743  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  5744  	LONG $0x10c78348                           // add    rdi, 16
  5745  	LONG $0x02c08348                           // add    rax, 2
  5746  	JNE  LBB0_813
  5747  	JMP  LBB0_814
  5748  
  5749  LBB0_958:
  5750  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5751  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5752  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5753  	LONG $0xd1970f41         // seta    r9b
  5754  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5755  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5756  	LONG $0xd3970f41         // seta    r11b
  5757  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5758  	WORD $0x970f; BYTE $0xd0 // seta    al
  5759  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5760  	LONG $0xd7970f40         // seta    dil
  5761  	WORD $0xf631             // xor    esi, esi
  5762  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5763  	JNE  LBB0_967
  5764  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5765  	JNE  LBB0_967
  5766  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5767  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5768  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5769  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5770  	LONG $0x03e9c149         // shr    r9, 3
  5771  	LONG $0x01c18349         // add    r9, 1
  5772  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5773  	JE   LBB0_961
  5774  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5775  	LONG $0xfee08348         // and    rax, -2
  5776  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5777  	WORD $0xff31             // xor    edi, edi
  5778  
  5779  LBB0_963:
  5780  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5781  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5782  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5783  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  5784  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  5785  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  5786  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  5787  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  5788  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5789  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5790  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5791  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  5792  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  5793  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  5794  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  5795  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  5796  	LONG $0x10c78348                           // add    rdi, 16
  5797  	LONG $0x02c08348                           // add    rax, 2
  5798  	JNE  LBB0_963
  5799  	JMP  LBB0_964
  5800  
  5801  LBB0_123:
  5802  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5803  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5804  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5805  	LONG $0xd1970f41         // seta    r9b
  5806  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5807  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5808  	LONG $0xd3970f41         // seta    r11b
  5809  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5810  	WORD $0x970f; BYTE $0xd0 // seta    al
  5811  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5812  	LONG $0xd7970f40         // seta    dil
  5813  	WORD $0xf631             // xor    esi, esi
  5814  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5815  	JNE  LBB0_132
  5816  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5817  	JNE  LBB0_132
  5818  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5819  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5820  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5821  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5822  	LONG $0x03e9c149         // shr    r9, 3
  5823  	LONG $0x01c18349         // add    r9, 1
  5824  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5825  	JE   LBB0_126
  5826  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5827  	LONG $0xfee08348         // and    rax, -2
  5828  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5829  	WORD $0xff31             // xor    edi, edi
  5830  
  5831  LBB0_128:
  5832  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5833  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5834  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5835  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  5836  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  5837  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  5838  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  5839  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  5840  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5841  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5842  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5843  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  5844  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  5845  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  5846  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  5847  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  5848  	LONG $0x10c78348                           // add    rdi, 16
  5849  	LONG $0x02c08348                           // add    rax, 2
  5850  	JNE  LBB0_128
  5851  	JMP  LBB0_129
  5852  
  5853  LBB0_469:
  5854  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5855  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5856  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5857  	LONG $0xd1970f41         // seta    r9b
  5858  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5859  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5860  	LONG $0xd3970f41         // seta    r11b
  5861  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5862  	WORD $0x970f; BYTE $0xd0 // seta    al
  5863  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5864  	LONG $0xd7970f40         // seta    dil
  5865  	WORD $0xf631             // xor    esi, esi
  5866  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5867  	JNE  LBB0_478
  5868  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5869  	JNE  LBB0_478
  5870  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5871  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5872  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5873  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5874  	LONG $0x03e9c149         // shr    r9, 3
  5875  	LONG $0x01c18349         // add    r9, 1
  5876  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5877  	JE   LBB0_472
  5878  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5879  	LONG $0xfee08348         // and    rax, -2
  5880  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5881  	WORD $0xff31             // xor    edi, edi
  5882  
  5883  LBB0_474:
  5884  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5885  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5886  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5887  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  5888  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  5889  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  5890  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  5891  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  5892  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5893  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5894  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5895  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  5896  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
  5897  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  5898  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm0
  5899  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
  5900  	LONG $0x10c78348                           // add    rdi, 16
  5901  	LONG $0x02c08348                           // add    rax, 2
  5902  	JNE  LBB0_474
  5903  	JMP  LBB0_475
  5904  
  5905  LBB0_296:
  5906  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5907  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5908  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5909  	LONG $0xd1970f41         // seta    r9b
  5910  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5911  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5912  	LONG $0xd3970f41         // seta    r11b
  5913  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5914  	WORD $0x970f; BYTE $0xd0 // seta    al
  5915  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5916  	LONG $0xd7970f40         // seta    dil
  5917  	WORD $0xf631             // xor    esi, esi
  5918  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5919  	JNE  LBB0_305
  5920  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5921  	JNE  LBB0_305
  5922  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5923  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5924  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5925  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5926  	LONG $0x03e9c149         // shr    r9, 3
  5927  	LONG $0x01c18349         // add    r9, 1
  5928  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5929  	JE   LBB0_299
  5930  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5931  	LONG $0xfee08348         // and    rax, -2
  5932  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5933  	WORD $0xff31             // xor    edi, edi
  5934  
  5935  LBB0_301:
  5936  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5937  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5938  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5939  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  5940  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  5941  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  5942  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  5943  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  5944  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5945  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5946  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5947  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  5948  	LONG $0x446f0ff3; WORD $0x30b9             // movdqu    xmm0, oword [rcx + 4*rdi + 48]
  5949  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  5950  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm2
  5951  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm0
  5952  	LONG $0x10c78348                           // add    rdi, 16
  5953  	LONG $0x02c08348                           // add    rax, 2
  5954  	JNE  LBB0_301
  5955  	JMP  LBB0_302
  5956  
  5957  LBB0_642:
  5958  	LONG $0x90348d4b         // lea    rsi, [r8 + 4*r10]
  5959  	LONG $0x92048d4a         // lea    rax, [rdx + 4*r10]
  5960  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5961  	LONG $0xd1970f41         // seta    r9b
  5962  	LONG $0x91048d4a         // lea    rax, [rcx + 4*r10]
  5963  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
  5964  	LONG $0xd3970f41         // seta    r11b
  5965  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  5966  	WORD $0x970f; BYTE $0xd0 // seta    al
  5967  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
  5968  	LONG $0xd7970f40         // seta    dil
  5969  	WORD $0xf631             // xor    esi, esi
  5970  	WORD $0x8445; BYTE $0xd9 // test    r9b, r11b
  5971  	JNE  LBB0_651
  5972  	WORD $0x2040; BYTE $0xf8 // and    al, dil
  5973  	JNE  LBB0_651
  5974  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
  5975  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
  5976  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
  5977  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
  5978  	LONG $0x03e9c149         // shr    r9, 3
  5979  	LONG $0x01c18349         // add    r9, 1
  5980  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
  5981  	JE   LBB0_645
  5982  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
  5983  	LONG $0xfee08348         // and    rax, -2
  5984  	WORD $0xf748; BYTE $0xd8 // neg    rax
  5985  	WORD $0xff31             // xor    edi, edi
  5986  
  5987  LBB0_647:
  5988  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  5989  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  5990  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  5991  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  5992  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  5993  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  5994  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  5995  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  5996  	LONG $0x446f0ff3; WORD $0x20ba             // movdqu    xmm0, oword [rdx + 4*rdi + 32]
  5997  	LONG $0x4c6f0ff3; WORD $0x30ba             // movdqu    xmm1, oword [rdx + 4*rdi + 48]
  5998  	LONG $0x546f0ff3; WORD $0x20b9             // movdqu    xmm2, oword [rcx + 4*rdi + 32]
  5999  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  6000  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
  6001  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  6002  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm0
  6003  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
  6004  	LONG $0x10c78348                           // add    rdi, 16
  6005  	LONG $0x02c08348                           // add    rax, 2
  6006  	JNE  LBB0_647
  6007  	JMP  LBB0_648
  6008  
  6009  LBB0_795:
  6010  	WORD $0xff31 // xor    edi, edi
  6011  
  6012  LBB0_798:
  6013  	LONG $0x01c1f641                           // test    r9b, 1
  6014  	JE   LBB0_800
  6015  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6016  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6017  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6018  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  6019  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  6020  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  6021  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  6022  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  6023  
  6024  LBB0_800:
  6025  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6026  	JNE  LBB0_801
  6027  	JMP  LBB0_1013
  6028  
  6029  LBB0_945:
  6030  	WORD $0xff31 // xor    edi, edi
  6031  
  6032  LBB0_948:
  6033  	LONG $0x01c1f641                           // test    r9b, 1
  6034  	JE   LBB0_950
  6035  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6036  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6037  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6038  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  6039  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  6040  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  6041  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  6042  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  6043  
  6044  LBB0_950:
  6045  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6046  	JNE  LBB0_951
  6047  	JMP  LBB0_1013
  6048  
  6049  LBB0_110:
  6050  	WORD $0xff31 // xor    edi, edi
  6051  
  6052  LBB0_113:
  6053  	LONG $0x01c1f641                           // test    r9b, 1
  6054  	JE   LBB0_115
  6055  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6056  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6057  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6058  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  6059  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  6060  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  6061  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  6062  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  6063  
  6064  LBB0_115:
  6065  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6066  	JE   LBB0_1013
  6067  	JMP  LBB0_116
  6068  
  6069  LBB0_456:
  6070  	WORD $0xff31 // xor    edi, edi
  6071  
  6072  LBB0_459:
  6073  	LONG $0x01c1f641                           // test    r9b, 1
  6074  	JE   LBB0_461
  6075  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6076  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6077  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6078  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  6079  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  6080  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  6081  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  6082  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  6083  
  6084  LBB0_461:
  6085  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6086  	JNE  LBB0_462
  6087  	JMP  LBB0_1013
  6088  
  6089  LBB0_283:
  6090  	WORD $0xff31 // xor    edi, edi
  6091  
  6092  LBB0_286:
  6093  	LONG $0x01c1f641                           // test    r9b, 1
  6094  	JE   LBB0_288
  6095  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6096  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6097  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6098  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  6099  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  6100  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  6101  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  6102  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  6103  
  6104  LBB0_288:
  6105  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6106  	JE   LBB0_1013
  6107  	JMP  LBB0_289
  6108  
  6109  LBB0_629:
  6110  	WORD $0xff31 // xor    edi, edi
  6111  
  6112  LBB0_632:
  6113  	LONG $0x01c1f641                           // test    r9b, 1
  6114  	JE   LBB0_634
  6115  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  6116  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  6117  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  6118  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  6119  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  6120  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  6121  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  6122  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  6123  
  6124  LBB0_634:
  6125  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6126  	JNE  LBB0_635
  6127  	JMP  LBB0_1013
  6128  
  6129  LBB0_853:
  6130  	WORD $0xff31 // xor    edi, edi
  6131  
  6132  LBB0_856:
  6133  	LONG $0x01c1f641                           // test    r9b, 1
  6134  	JE   LBB0_858
  6135  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6136  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6137  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6138  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  6139  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  6140  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  6141  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  6142  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  6143  
  6144  LBB0_858:
  6145  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6146  	JNE  LBB0_859
  6147  	JMP  LBB0_1013
  6148  
  6149  LBB0_1003:
  6150  	WORD $0xff31 // xor    edi, edi
  6151  
  6152  LBB0_1006:
  6153  	LONG $0x01c1f641                           // test    r9b, 1
  6154  	JE   LBB0_1008
  6155  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6156  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6157  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6158  	LONG $0xd0590f66                           // mulpd    xmm2, xmm0
  6159  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  6160  	LONG $0xc1590f66                           // mulpd    xmm0, xmm1
  6161  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  6162  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  6163  
  6164  LBB0_1008:
  6165  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6166  	JNE  LBB0_1009
  6167  	JMP  LBB0_1013
  6168  
  6169  LBB0_184:
  6170  	WORD $0xff31 // xor    edi, edi
  6171  
  6172  LBB0_187:
  6173  	LONG $0x01c1f641                           // test    r9b, 1
  6174  	JE   LBB0_189
  6175  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6176  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6177  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6178  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  6179  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  6180  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  6181  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  6182  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  6183  
  6184  LBB0_189:
  6185  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6186  	JE   LBB0_1013
  6187  	JMP  LBB0_190
  6188  
  6189  LBB0_530:
  6190  	WORD $0xff31 // xor    edi, edi
  6191  
  6192  LBB0_533:
  6193  	LONG $0x01c1f641                           // test    r9b, 1
  6194  	JE   LBB0_535
  6195  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6196  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6197  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6198  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  6199  	LONG $0x54100f66; WORD $0x10f9             // movupd    xmm2, oword [rcx + 8*rdi + 16]
  6200  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  6201  	LONG $0x110f4166; WORD $0xf804             // movupd    oword [r8 + 8*rdi], xmm0
  6202  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
  6203  
  6204  LBB0_535:
  6205  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6206  	JNE  LBB0_536
  6207  	JMP  LBB0_1013
  6208  
  6209  LBB0_357:
  6210  	WORD $0xff31 // xor    edi, edi
  6211  
  6212  LBB0_360:
  6213  	LONG $0x01c1f641                           // test    r9b, 1
  6214  	JE   LBB0_362
  6215  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6216  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6217  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6218  	LONG $0xd0580f66                           // addpd    xmm2, xmm0
  6219  	LONG $0x44100f66; WORD $0x10f9             // movupd    xmm0, oword [rcx + 8*rdi + 16]
  6220  	LONG $0xc1580f66                           // addpd    xmm0, xmm1
  6221  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
  6222  	LONG $0x110f4166; WORD $0xf844; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm0
  6223  
  6224  LBB0_362:
  6225  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6226  	JNE  LBB0_363
  6227  	JMP  LBB0_1013
  6228  
  6229  LBB0_703:
  6230  	WORD $0xff31 // xor    edi, edi
  6231  
  6232  LBB0_706:
  6233  	LONG $0x01c1f641                           // test    r9b, 1
  6234  	JE   LBB0_708
  6235  	LONG $0x04100f66; BYTE $0xfa               // movupd    xmm0, oword [rdx + 8*rdi]
  6236  	LONG $0x4c100f66; WORD $0x10fa             // movupd    xmm1, oword [rdx + 8*rdi + 16]
  6237  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
  6238  	LONG $0xc25c0f66                           // subpd    xmm0, xmm2
  6239  	LONG $0x54100f66; WORD $0x10f9             // movupd    xmm2, oword [rcx + 8*rdi + 16]
  6240  	LONG $0xca5c0f66                           // subpd    xmm1, xmm2
  6241  	LONG $0x110f4166; WORD $0xf804             // movupd    oword [r8 + 8*rdi], xmm0
  6242  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
  6243  
  6244  LBB0_708:
  6245  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6246  	JNE  LBB0_709
  6247  	JMP  LBB0_1013
  6248  
  6249  LBB0_750:
  6250  	WORD $0xc031 // xor    eax, eax
  6251  
  6252  LBB0_753:
  6253  	LONG $0x01c1f641                           // test    r9b, 1
  6254  	JE   LBB0_755
  6255  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  6256  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  6257  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  6258  	LONG $0x446f0ff3; WORD $0x1001             // movdqu    xmm0, oword [rcx + rax + 16]
  6259  	LONG $0x30380f66; BYTE $0xe1               // pmovzxbw    xmm4, xmm1
  6260  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  6261  	LONG $0x30380f66; BYTE $0xeb               // pmovzxbw    xmm5, xmm3
  6262  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  6263  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  6264  	LONG $0x4d6f0f66; BYTE $0x00               // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
  6265  	LONG $0xd9db0f66                           // pand    xmm3, xmm1
  6266  	LONG $0xecd50f66                           // pmullw    xmm5, xmm4
  6267  	LONG $0xe9db0f66                           // pand    xmm5, xmm1
  6268  	LONG $0xeb670f66                           // packuswb    xmm5, xmm3
  6269  	LONG $0x30380f66; BYTE $0xda               // pmovzxbw    xmm3, xmm2
  6270  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  6271  	LONG $0x30380f66; BYTE $0xe0               // pmovzxbw    xmm4, xmm0
  6272  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
  6273  	LONG $0xc2d50f66                           // pmullw    xmm0, xmm2
  6274  	LONG $0xc1db0f66                           // pand    xmm0, xmm1
  6275  	LONG $0xe3d50f66                           // pmullw    xmm4, xmm3
  6276  	LONG $0xe1db0f66                           // pand    xmm4, xmm1
  6277  	LONG $0xe0670f66                           // packuswb    xmm4, xmm0
  6278  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
  6279  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
  6280  
  6281  LBB0_755:
  6282  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
  6283  	JNE  LBB0_756
  6284  	JMP  LBB0_1013
  6285  
  6286  LBB0_900:
  6287  	WORD $0xc031 // xor    eax, eax
  6288  
  6289  LBB0_903:
  6290  	LONG $0x01c1f641                           // test    r9b, 1
  6291  	JE   LBB0_905
  6292  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  6293  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  6294  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  6295  	LONG $0x446f0ff3; WORD $0x1001             // movdqu    xmm0, oword [rcx + rax + 16]
  6296  	LONG $0x30380f66; BYTE $0xe1               // pmovzxbw    xmm4, xmm1
  6297  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  6298  	LONG $0x30380f66; BYTE $0xeb               // pmovzxbw    xmm5, xmm3
  6299  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  6300  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  6301  	LONG $0x4d6f0f66; BYTE $0x00               // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
  6302  	LONG $0xd9db0f66                           // pand    xmm3, xmm1
  6303  	LONG $0xecd50f66                           // pmullw    xmm5, xmm4
  6304  	LONG $0xe9db0f66                           // pand    xmm5, xmm1
  6305  	LONG $0xeb670f66                           // packuswb    xmm5, xmm3
  6306  	LONG $0x30380f66; BYTE $0xda               // pmovzxbw    xmm3, xmm2
  6307  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  6308  	LONG $0x30380f66; BYTE $0xe0               // pmovzxbw    xmm4, xmm0
  6309  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
  6310  	LONG $0xc2d50f66                           // pmullw    xmm0, xmm2
  6311  	LONG $0xc1db0f66                           // pand    xmm0, xmm1
  6312  	LONG $0xe3d50f66                           // pmullw    xmm4, xmm3
  6313  	LONG $0xe1db0f66                           // pand    xmm4, xmm1
  6314  	LONG $0xe0670f66                           // packuswb    xmm4, xmm0
  6315  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
  6316  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
  6317  
  6318  LBB0_905:
  6319  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
  6320  	JNE  LBB0_906
  6321  	JMP  LBB0_1013
  6322  
  6323  LBB0_65:
  6324  	WORD $0xff31 // xor    edi, edi
  6325  
  6326  LBB0_68:
  6327  	LONG $0x01c1f641                           // test    r9b, 1
  6328  	JE   LBB0_70
  6329  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  6330  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  6331  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  6332  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  6333  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  6334  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  6335  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  6336  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  6337  
  6338  LBB0_70:
  6339  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6340  	JE   LBB0_1013
  6341  	JMP  LBB0_71
  6342  
  6343  LBB0_411:
  6344  	WORD $0xff31 // xor    edi, edi
  6345  
  6346  LBB0_414:
  6347  	LONG $0x01c1f641                           // test    r9b, 1
  6348  	JE   LBB0_416
  6349  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  6350  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  6351  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  6352  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  6353  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  6354  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  6355  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  6356  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  6357  
  6358  LBB0_416:
  6359  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6360  	JNE  LBB0_417
  6361  	JMP  LBB0_1013
  6362  
  6363  LBB0_238:
  6364  	WORD $0xff31 // xor    edi, edi
  6365  
  6366  LBB0_241:
  6367  	LONG $0x01c1f641                           // test    r9b, 1
  6368  	JE   LBB0_243
  6369  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  6370  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  6371  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  6372  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  6373  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  6374  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  6375  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  6376  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  6377  
  6378  LBB0_243:
  6379  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6380  	JE   LBB0_1013
  6381  	JMP  LBB0_244
  6382  
  6383  LBB0_584:
  6384  	WORD $0xff31 // xor    edi, edi
  6385  
  6386  LBB0_587:
  6387  	LONG $0x01c1f641                           // test    r9b, 1
  6388  	JE   LBB0_589
  6389  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  6390  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  6391  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  6392  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  6393  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  6394  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  6395  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  6396  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  6397  
  6398  LBB0_589:
  6399  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6400  	JNE  LBB0_590
  6401  	JMP  LBB0_1013
  6402  
  6403  LBB0_139:
  6404  	WORD $0xff31 // xor    edi, edi
  6405  
  6406  LBB0_142:
  6407  	LONG $0x01c1f641                           // test    r9b, 1
  6408  	JE   LBB0_144
  6409  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6410  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6411  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6412  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  6413  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  6414  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  6415  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  6416  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  6417  
  6418  LBB0_144:
  6419  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6420  	JE   LBB0_1013
  6421  	JMP  LBB0_145
  6422  
  6423  LBB0_485:
  6424  	WORD $0xff31 // xor    edi, edi
  6425  
  6426  LBB0_488:
  6427  	LONG $0x01c1f641                           // test    r9b, 1
  6428  	JE   LBB0_490
  6429  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6430  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6431  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6432  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  6433  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  6434  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  6435  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  6436  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  6437  
  6438  LBB0_490:
  6439  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6440  	JNE  LBB0_491
  6441  	JMP  LBB0_1013
  6442  
  6443  LBB0_312:
  6444  	WORD $0xff31 // xor    edi, edi
  6445  
  6446  LBB0_315:
  6447  	LONG $0x01c1f641                           // test    r9b, 1
  6448  	JE   LBB0_317
  6449  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6450  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6451  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6452  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  6453  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  6454  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  6455  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  6456  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  6457  
  6458  LBB0_317:
  6459  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6460  	JNE  LBB0_318
  6461  	JMP  LBB0_1013
  6462  
  6463  LBB0_658:
  6464  	WORD $0xff31 // xor    edi, edi
  6465  
  6466  LBB0_661:
  6467  	LONG $0x01c1f641                           // test    r9b, 1
  6468  	JE   LBB0_663
  6469  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6470  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6471  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6472  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  6473  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  6474  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  6475  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  6476  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  6477  
  6478  LBB0_663:
  6479  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6480  	JNE  LBB0_664
  6481  	JMP  LBB0_1013
  6482  
  6483  LBB0_766:
  6484  	WORD $0xff31 // xor    edi, edi
  6485  
  6486  LBB0_769:
  6487  	LONG $0x01c1f641                           // test    r9b, 1
  6488  	JE   LBB0_771
  6489  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6490  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6491  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6492  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  6493  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6494  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  6495  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6496  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6497  
  6498  LBB0_771:
  6499  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6500  	JNE  LBB0_772
  6501  	JMP  LBB0_1013
  6502  
  6503  LBB0_782:
  6504  	WORD $0xff31 // xor    edi, edi
  6505  
  6506  LBB0_785:
  6507  	LONG $0x01c1f641                           // test    r9b, 1
  6508  	JE   LBB0_787
  6509  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6510  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6511  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6512  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  6513  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6514  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  6515  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6516  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6517  
  6518  LBB0_787:
  6519  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6520  	JNE  LBB0_788
  6521  	JMP  LBB0_1013
  6522  
  6523  LBB0_916:
  6524  	WORD $0xff31 // xor    edi, edi
  6525  
  6526  LBB0_919:
  6527  	LONG $0x01c1f641                           // test    r9b, 1
  6528  	JE   LBB0_921
  6529  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6530  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6531  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6532  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  6533  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6534  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  6535  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6536  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6537  
  6538  LBB0_921:
  6539  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6540  	JNE  LBB0_922
  6541  	JMP  LBB0_1013
  6542  
  6543  LBB0_932:
  6544  	WORD $0xff31 // xor    edi, edi
  6545  
  6546  LBB0_935:
  6547  	LONG $0x01c1f641                           // test    r9b, 1
  6548  	JE   LBB0_937
  6549  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6550  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6551  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6552  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
  6553  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6554  	LONG $0xc1d50f66                           // pmullw    xmm0, xmm1
  6555  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6556  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6557  
  6558  LBB0_937:
  6559  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6560  	JNE  LBB0_938
  6561  	JMP  LBB0_1013
  6562  
  6563  LBB0_81:
  6564  	WORD $0xff31 // xor    edi, edi
  6565  
  6566  LBB0_84:
  6567  	LONG $0x01c1f641                           // test    r9b, 1
  6568  	JE   LBB0_86
  6569  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6570  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6571  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6572  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  6573  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6574  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  6575  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6576  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6577  
  6578  LBB0_86:
  6579  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6580  	JE   LBB0_1013
  6581  	JMP  LBB0_87
  6582  
  6583  LBB0_97:
  6584  	WORD $0xff31 // xor    edi, edi
  6585  
  6586  LBB0_100:
  6587  	LONG $0x01c1f641                           // test    r9b, 1
  6588  	JE   LBB0_102
  6589  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6590  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6591  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6592  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  6593  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6594  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  6595  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6596  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6597  
  6598  LBB0_102:
  6599  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6600  	JE   LBB0_1013
  6601  	JMP  LBB0_103
  6602  
  6603  LBB0_427:
  6604  	WORD $0xff31 // xor    edi, edi
  6605  
  6606  LBB0_430:
  6607  	LONG $0x01c1f641                           // test    r9b, 1
  6608  	JE   LBB0_432
  6609  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6610  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6611  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6612  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  6613  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  6614  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  6615  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  6616  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  6617  
  6618  LBB0_432:
  6619  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6620  	JNE  LBB0_433
  6621  	JMP  LBB0_1013
  6622  
  6623  LBB0_443:
  6624  	WORD $0xff31 // xor    edi, edi
  6625  
  6626  LBB0_446:
  6627  	LONG $0x01c1f641                           // test    r9b, 1
  6628  	JE   LBB0_448
  6629  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6630  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6631  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6632  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  6633  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  6634  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  6635  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  6636  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  6637  
  6638  LBB0_448:
  6639  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6640  	JNE  LBB0_449
  6641  	JMP  LBB0_1013
  6642  
  6643  LBB0_254:
  6644  	WORD $0xff31 // xor    edi, edi
  6645  
  6646  LBB0_257:
  6647  	LONG $0x01c1f641                           // test    r9b, 1
  6648  	JE   LBB0_259
  6649  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6650  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6651  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6652  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  6653  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6654  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  6655  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6656  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6657  
  6658  LBB0_259:
  6659  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6660  	JE   LBB0_1013
  6661  	JMP  LBB0_260
  6662  
  6663  LBB0_270:
  6664  	WORD $0xff31 // xor    edi, edi
  6665  
  6666  LBB0_273:
  6667  	LONG $0x01c1f641                           // test    r9b, 1
  6668  	JE   LBB0_275
  6669  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6670  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6671  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6672  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
  6673  	LONG $0x446f0ff3; WORD $0x1079             // movdqu    xmm0, oword [rcx + 2*rdi + 16]
  6674  	LONG $0xc1fd0f66                           // paddw    xmm0, xmm1
  6675  	LONG $0x7f0f41f3; WORD $0x7814             // movdqu    oword [r8 + 2*rdi], xmm2
  6676  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
  6677  
  6678  LBB0_275:
  6679  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6680  	JE   LBB0_1013
  6681  	JMP  LBB0_276
  6682  
  6683  LBB0_600:
  6684  	WORD $0xff31 // xor    edi, edi
  6685  
  6686  LBB0_603:
  6687  	LONG $0x01c1f641                           // test    r9b, 1
  6688  	JE   LBB0_605
  6689  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6690  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6691  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6692  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  6693  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  6694  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  6695  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  6696  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  6697  
  6698  LBB0_605:
  6699  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6700  	JNE  LBB0_606
  6701  	JMP  LBB0_1013
  6702  
  6703  LBB0_616:
  6704  	WORD $0xff31 // xor    edi, edi
  6705  
  6706  LBB0_619:
  6707  	LONG $0x01c1f641                           // test    r9b, 1
  6708  	JE   LBB0_621
  6709  	LONG $0x046f0ff3; BYTE $0x7a               // movdqu    xmm0, oword [rdx + 2*rdi]
  6710  	LONG $0x4c6f0ff3; WORD $0x107a             // movdqu    xmm1, oword [rdx + 2*rdi + 16]
  6711  	LONG $0x146f0ff3; BYTE $0x79               // movdqu    xmm2, oword [rcx + 2*rdi]
  6712  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
  6713  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
  6714  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
  6715  	LONG $0x7f0f41f3; WORD $0x7804             // movdqu    oword [r8 + 2*rdi], xmm0
  6716  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
  6717  
  6718  LBB0_621:
  6719  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6720  	JNE  LBB0_622
  6721  	JMP  LBB0_1013
  6722  
  6723  LBB0_840:
  6724  	WORD $0xff31 // xor    edi, edi
  6725  
  6726  LBB0_843:
  6727  	LONG $0x01c1f641               // test    r9b, 1
  6728  	JE   LBB0_845
  6729  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6730  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6731  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6732  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  6733  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  6734  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  6735  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  6736  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  6737  
  6738  LBB0_845:
  6739  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6740  	JNE  LBB0_846
  6741  	JMP  LBB0_1013
  6742  
  6743  LBB0_990:
  6744  	WORD $0xff31 // xor    edi, edi
  6745  
  6746  LBB0_993:
  6747  	LONG $0x01c1f641               // test    r9b, 1
  6748  	JE   LBB0_995
  6749  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6750  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6751  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6752  	WORD $0x590f; BYTE $0xd0       // mulps    xmm2, xmm0
  6753  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  6754  	WORD $0x590f; BYTE $0xc1       // mulps    xmm0, xmm1
  6755  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  6756  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  6757  
  6758  LBB0_995:
  6759  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6760  	JNE  LBB0_996
  6761  	JMP  LBB0_1013
  6762  
  6763  LBB0_155:
  6764  	WORD $0xff31 // xor    edi, edi
  6765  
  6766  LBB0_158:
  6767  	LONG $0x01c1f641                           // test    r9b, 1
  6768  	JE   LBB0_160
  6769  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6770  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6771  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6772  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  6773  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  6774  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  6775  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  6776  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  6777  
  6778  LBB0_160:
  6779  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6780  	JE   LBB0_1013
  6781  	JMP  LBB0_161
  6782  
  6783  LBB0_171:
  6784  	WORD $0xff31 // xor    edi, edi
  6785  
  6786  LBB0_174:
  6787  	LONG $0x01c1f641               // test    r9b, 1
  6788  	JE   LBB0_176
  6789  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6790  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6791  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6792  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  6793  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  6794  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  6795  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  6796  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  6797  
  6798  LBB0_176:
  6799  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6800  	JE   LBB0_1013
  6801  	JMP  LBB0_177
  6802  
  6803  LBB0_501:
  6804  	WORD $0xff31 // xor    edi, edi
  6805  
  6806  LBB0_504:
  6807  	LONG $0x01c1f641                           // test    r9b, 1
  6808  	JE   LBB0_506
  6809  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6810  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6811  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6812  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  6813  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  6814  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  6815  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  6816  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  6817  
  6818  LBB0_506:
  6819  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6820  	JNE  LBB0_507
  6821  	JMP  LBB0_1013
  6822  
  6823  LBB0_517:
  6824  	WORD $0xff31 // xor    edi, edi
  6825  
  6826  LBB0_520:
  6827  	LONG $0x01c1f641               // test    r9b, 1
  6828  	JE   LBB0_522
  6829  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6830  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6831  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6832  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  6833  	LONG $0xb954100f; BYTE $0x10   // movups    xmm2, oword [rcx + 4*rdi + 16]
  6834  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  6835  	LONG $0x04110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm0
  6836  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
  6837  
  6838  LBB0_522:
  6839  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6840  	JNE  LBB0_523
  6841  	JMP  LBB0_1013
  6842  
  6843  LBB0_328:
  6844  	WORD $0xff31 // xor    edi, edi
  6845  
  6846  LBB0_331:
  6847  	LONG $0x01c1f641                           // test    r9b, 1
  6848  	JE   LBB0_333
  6849  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6850  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6851  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6852  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
  6853  	LONG $0x446f0ff3; WORD $0x10f9             // movdqu    xmm0, oword [rcx + 8*rdi + 16]
  6854  	LONG $0xc1d40f66                           // paddq    xmm0, xmm1
  6855  	LONG $0x7f0f41f3; WORD $0xf814             // movdqu    oword [r8 + 8*rdi], xmm2
  6856  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
  6857  
  6858  LBB0_333:
  6859  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6860  	JNE  LBB0_334
  6861  	JMP  LBB0_1013
  6862  
  6863  LBB0_344:
  6864  	WORD $0xff31 // xor    edi, edi
  6865  
  6866  LBB0_347:
  6867  	LONG $0x01c1f641               // test    r9b, 1
  6868  	JE   LBB0_349
  6869  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6870  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6871  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6872  	WORD $0x580f; BYTE $0xd0       // addps    xmm2, xmm0
  6873  	LONG $0xb944100f; BYTE $0x10   // movups    xmm0, oword [rcx + 4*rdi + 16]
  6874  	WORD $0x580f; BYTE $0xc1       // addps    xmm0, xmm1
  6875  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
  6876  	LONG $0x44110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm0
  6877  
  6878  LBB0_349:
  6879  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6880  	JNE  LBB0_350
  6881  	JMP  LBB0_1013
  6882  
  6883  LBB0_674:
  6884  	WORD $0xff31 // xor    edi, edi
  6885  
  6886  LBB0_677:
  6887  	LONG $0x01c1f641                           // test    r9b, 1
  6888  	JE   LBB0_679
  6889  	LONG $0x046f0ff3; BYTE $0xfa               // movdqu    xmm0, oword [rdx + 8*rdi]
  6890  	LONG $0x4c6f0ff3; WORD $0x10fa             // movdqu    xmm1, oword [rdx + 8*rdi + 16]
  6891  	LONG $0x146f0ff3; BYTE $0xf9               // movdqu    xmm2, oword [rcx + 8*rdi]
  6892  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
  6893  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
  6894  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
  6895  	LONG $0x7f0f41f3; WORD $0xf804             // movdqu    oword [r8 + 8*rdi], xmm0
  6896  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
  6897  
  6898  LBB0_679:
  6899  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6900  	JNE  LBB0_680
  6901  	JMP  LBB0_1013
  6902  
  6903  LBB0_690:
  6904  	WORD $0xff31 // xor    edi, edi
  6905  
  6906  LBB0_693:
  6907  	LONG $0x01c1f641               // test    r9b, 1
  6908  	JE   LBB0_695
  6909  	LONG $0xba04100f               // movups    xmm0, oword [rdx + 4*rdi]
  6910  	LONG $0xba4c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rdi + 16]
  6911  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
  6912  	WORD $0x5c0f; BYTE $0xc2       // subps    xmm0, xmm2
  6913  	LONG $0xb954100f; BYTE $0x10   // movups    xmm2, oword [rcx + 4*rdi + 16]
  6914  	WORD $0x5c0f; BYTE $0xca       // subps    xmm1, xmm2
  6915  	LONG $0x04110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm0
  6916  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
  6917  
  6918  LBB0_695:
  6919  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  6920  	JNE  LBB0_696
  6921  	JMP  LBB0_1013
  6922  
  6923  LBB0_737:
  6924  	WORD $0xc031 // xor    eax, eax
  6925  
  6926  LBB0_740:
  6927  	LONG $0x01c1f641                           // test    r9b, 1
  6928  	JE   LBB0_742
  6929  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  6930  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  6931  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  6932  	LONG $0x446f0ff3; WORD $0x1001             // movdqu    xmm0, oword [rcx + rax + 16]
  6933  	LONG $0x30380f66; BYTE $0xe1               // pmovzxbw    xmm4, xmm1
  6934  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  6935  	LONG $0x30380f66; BYTE $0xeb               // pmovzxbw    xmm5, xmm3
  6936  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  6937  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  6938  	LONG $0x4d6f0f66; BYTE $0x00               // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
  6939  	LONG $0xd9db0f66                           // pand    xmm3, xmm1
  6940  	LONG $0xecd50f66                           // pmullw    xmm5, xmm4
  6941  	LONG $0xe9db0f66                           // pand    xmm5, xmm1
  6942  	LONG $0xeb670f66                           // packuswb    xmm5, xmm3
  6943  	LONG $0x30380f66; BYTE $0xda               // pmovzxbw    xmm3, xmm2
  6944  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  6945  	LONG $0x30380f66; BYTE $0xe0               // pmovzxbw    xmm4, xmm0
  6946  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
  6947  	LONG $0xc2d50f66                           // pmullw    xmm0, xmm2
  6948  	LONG $0xc1db0f66                           // pand    xmm0, xmm1
  6949  	LONG $0xe3d50f66                           // pmullw    xmm4, xmm3
  6950  	LONG $0xe1db0f66                           // pand    xmm4, xmm1
  6951  	LONG $0xe0670f66                           // packuswb    xmm4, xmm0
  6952  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
  6953  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
  6954  
  6955  LBB0_742:
  6956  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
  6957  	JNE  LBB0_743
  6958  	JMP  LBB0_1013
  6959  
  6960  LBB0_887:
  6961  	WORD $0xc031 // xor    eax, eax
  6962  
  6963  LBB0_890:
  6964  	LONG $0x01c1f641                           // test    r9b, 1
  6965  	JE   LBB0_892
  6966  	LONG $0x0c6f0ff3; BYTE $0x02               // movdqu    xmm1, oword [rdx + rax]
  6967  	LONG $0x546f0ff3; WORD $0x1002             // movdqu    xmm2, oword [rdx + rax + 16]
  6968  	LONG $0x1c6f0ff3; BYTE $0x01               // movdqu    xmm3, oword [rcx + rax]
  6969  	LONG $0x446f0ff3; WORD $0x1001             // movdqu    xmm0, oword [rcx + rax + 16]
  6970  	LONG $0x30380f66; BYTE $0xe1               // pmovzxbw    xmm4, xmm1
  6971  	LONG $0xc9680f66                           // punpckhbw    xmm1, xmm1
  6972  	LONG $0x30380f66; BYTE $0xeb               // pmovzxbw    xmm5, xmm3
  6973  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
  6974  	LONG $0xd9d50f66                           // pmullw    xmm3, xmm1
  6975  	LONG $0x4d6f0f66; BYTE $0x00               // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
  6976  	LONG $0xd9db0f66                           // pand    xmm3, xmm1
  6977  	LONG $0xecd50f66                           // pmullw    xmm5, xmm4
  6978  	LONG $0xe9db0f66                           // pand    xmm5, xmm1
  6979  	LONG $0xeb670f66                           // packuswb    xmm5, xmm3
  6980  	LONG $0x30380f66; BYTE $0xda               // pmovzxbw    xmm3, xmm2
  6981  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
  6982  	LONG $0x30380f66; BYTE $0xe0               // pmovzxbw    xmm4, xmm0
  6983  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
  6984  	LONG $0xc2d50f66                           // pmullw    xmm0, xmm2
  6985  	LONG $0xc1db0f66                           // pand    xmm0, xmm1
  6986  	LONG $0xe3d50f66                           // pmullw    xmm4, xmm3
  6987  	LONG $0xe1db0f66                           // pand    xmm4, xmm1
  6988  	LONG $0xe0670f66                           // packuswb    xmm4, xmm0
  6989  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
  6990  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
  6991  
  6992  LBB0_892:
  6993  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
  6994  	JNE  LBB0_893
  6995  	JMP  LBB0_1013
  6996  
  6997  LBB0_52:
  6998  	WORD $0xff31 // xor    edi, edi
  6999  
  7000  LBB0_55:
  7001  	LONG $0x01c1f641                           // test    r9b, 1
  7002  	JE   LBB0_57
  7003  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  7004  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  7005  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  7006  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  7007  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  7008  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  7009  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  7010  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  7011  
  7012  LBB0_57:
  7013  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7014  	JE   LBB0_1013
  7015  	JMP  LBB0_58
  7016  
  7017  LBB0_398:
  7018  	WORD $0xff31 // xor    edi, edi
  7019  
  7020  LBB0_401:
  7021  	LONG $0x01c1f641                           // test    r9b, 1
  7022  	JE   LBB0_403
  7023  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  7024  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  7025  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  7026  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  7027  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  7028  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  7029  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  7030  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  7031  
  7032  LBB0_403:
  7033  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7034  	JNE  LBB0_404
  7035  	JMP  LBB0_1013
  7036  
  7037  LBB0_225:
  7038  	WORD $0xff31 // xor    edi, edi
  7039  
  7040  LBB0_228:
  7041  	LONG $0x01c1f641                           // test    r9b, 1
  7042  	JE   LBB0_230
  7043  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  7044  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  7045  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  7046  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
  7047  	LONG $0x446f0ff3; WORD $0x1039             // movdqu    xmm0, oword [rcx + rdi + 16]
  7048  	LONG $0xc1fc0f66                           // paddb    xmm0, xmm1
  7049  	LONG $0x7f0f41f3; WORD $0x3814             // movdqu    oword [r8 + rdi], xmm2
  7050  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
  7051  
  7052  LBB0_230:
  7053  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7054  	JE   LBB0_1013
  7055  	JMP  LBB0_231
  7056  
  7057  LBB0_571:
  7058  	WORD $0xff31 // xor    edi, edi
  7059  
  7060  LBB0_574:
  7061  	LONG $0x01c1f641                           // test    r9b, 1
  7062  	JE   LBB0_576
  7063  	LONG $0x046f0ff3; BYTE $0x3a               // movdqu    xmm0, oword [rdx + rdi]
  7064  	LONG $0x4c6f0ff3; WORD $0x103a             // movdqu    xmm1, oword [rdx + rdi + 16]
  7065  	LONG $0x146f0ff3; BYTE $0x39               // movdqu    xmm2, oword [rcx + rdi]
  7066  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
  7067  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
  7068  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
  7069  	LONG $0x7f0f41f3; WORD $0x3804             // movdqu    oword [r8 + rdi], xmm0
  7070  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
  7071  
  7072  LBB0_576:
  7073  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7074  	JNE  LBB0_577
  7075  	JMP  LBB0_1013
  7076  
  7077  LBB0_811:
  7078  	WORD $0xff31 // xor    edi, edi
  7079  
  7080  LBB0_814:
  7081  	LONG $0x01c1f641                           // test    r9b, 1
  7082  	JE   LBB0_816
  7083  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7084  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7085  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7086  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  7087  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  7088  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  7089  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  7090  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  7091  
  7092  LBB0_816:
  7093  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7094  	JNE  LBB0_817
  7095  	JMP  LBB0_1013
  7096  
  7097  LBB0_961:
  7098  	WORD $0xff31 // xor    edi, edi
  7099  
  7100  LBB0_964:
  7101  	LONG $0x01c1f641                           // test    r9b, 1
  7102  	JE   LBB0_966
  7103  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7104  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7105  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7106  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
  7107  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  7108  	LONG $0x40380f66; BYTE $0xc1               // pmulld    xmm0, xmm1
  7109  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  7110  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  7111  
  7112  LBB0_966:
  7113  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7114  	JNE  LBB0_967
  7115  	JMP  LBB0_1013
  7116  
  7117  LBB0_126:
  7118  	WORD $0xff31 // xor    edi, edi
  7119  
  7120  LBB0_129:
  7121  	LONG $0x01c1f641                           // test    r9b, 1
  7122  	JE   LBB0_131
  7123  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7124  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7125  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7126  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  7127  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  7128  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  7129  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  7130  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  7131  
  7132  LBB0_131:
  7133  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7134  	JE   LBB0_1013
  7135  	JMP  LBB0_132
  7136  
  7137  LBB0_472:
  7138  	WORD $0xff31 // xor    edi, edi
  7139  
  7140  LBB0_475:
  7141  	LONG $0x01c1f641                           // test    r9b, 1
  7142  	JE   LBB0_477
  7143  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7144  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7145  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7146  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  7147  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  7148  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  7149  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  7150  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  7151  
  7152  LBB0_477:
  7153  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7154  	JNE  LBB0_478
  7155  	JMP  LBB0_1013
  7156  
  7157  LBB0_299:
  7158  	WORD $0xff31 // xor    edi, edi
  7159  
  7160  LBB0_302:
  7161  	LONG $0x01c1f641                           // test    r9b, 1
  7162  	JE   LBB0_304
  7163  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7164  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7165  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7166  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
  7167  	LONG $0x446f0ff3; WORD $0x10b9             // movdqu    xmm0, oword [rcx + 4*rdi + 16]
  7168  	LONG $0xc1fe0f66                           // paddd    xmm0, xmm1
  7169  	LONG $0x7f0f41f3; WORD $0xb814             // movdqu    oword [r8 + 4*rdi], xmm2
  7170  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
  7171  
  7172  LBB0_304:
  7173  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7174  	JE   LBB0_1013
  7175  	JMP  LBB0_305
  7176  
  7177  LBB0_645:
  7178  	WORD $0xff31 // xor    edi, edi
  7179  
  7180  LBB0_648:
  7181  	LONG $0x01c1f641                           // test    r9b, 1
  7182  	JE   LBB0_650
  7183  	LONG $0x046f0ff3; BYTE $0xba               // movdqu    xmm0, oword [rdx + 4*rdi]
  7184  	LONG $0x4c6f0ff3; WORD $0x10ba             // movdqu    xmm1, oword [rdx + 4*rdi + 16]
  7185  	LONG $0x146f0ff3; BYTE $0xb9               // movdqu    xmm2, oword [rcx + 4*rdi]
  7186  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
  7187  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
  7188  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
  7189  	LONG $0x7f0f41f3; WORD $0xb804             // movdqu    oword [r8 + 4*rdi], xmm0
  7190  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
  7191  
  7192  LBB0_650:
  7193  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
  7194  	JNE  LBB0_651
  7195  	JMP  LBB0_1013
  7196  
  7197  DATA LCDATA2<>+0x000(SB)/8, $0x00ff00ff00ff00ff
  7198  DATA LCDATA2<>+0x008(SB)/8, $0x00ff00ff00ff00ff
  7199  GLOBL LCDATA2<>(SB), 8, $16
  7200  
  7201  TEXT ยท_arithmetic_arr_scalar_sse4(SB), $0-48
  7202  
  7203  	MOVQ typ+0(FP), DI
  7204  	MOVQ op+8(FP), SI
  7205  	MOVQ inLeft+16(FP), DX
  7206  	MOVQ inRight+24(FP), CX
  7207  	MOVQ out+32(FP), R8
  7208  	MOVQ len+40(FP), R9
  7209  	LEAQ LCDATA2<>(SB), BP
  7210  
  7211  	LONG $0x14fe8040         // cmp    sil, 20
  7212  	JG   LBB1_12
  7213  	WORD $0x8440; BYTE $0xf6 // test    sil, sil
  7214  	JE   LBB1_23
  7215  	LONG $0x01fe8040         // cmp    sil, 1
  7216  	JE   LBB1_31
  7217  	LONG $0x02fe8040         // cmp    sil, 2
  7218  	JNE  LBB1_1069
  7219  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7220  	JG   LBB1_55
  7221  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7222  	JLE  LBB1_97
  7223  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7224  	JE   LBB1_157
  7225  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7226  	JE   LBB1_160
  7227  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7228  	JNE  LBB1_1069
  7229  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7230  	JLE  LBB1_1069
  7231  	WORD $0x018b             // mov    eax, dword [rcx]
  7232  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7233  	LONG $0x08f98341         // cmp    r9d, 8
  7234  	JB   LBB1_11
  7235  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7236  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7237  	JBE  LBB1_453
  7238  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7239  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7240  	JBE  LBB1_453
  7241  
  7242  LBB1_11:
  7243  	WORD $0xf631 // xor    esi, esi
  7244  
  7245  LBB1_625:
  7246  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7247  	WORD $0xf749; BYTE $0xd1 // not    r9
  7248  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7249  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7250  	LONG $0x03e78348         // and    rdi, 3
  7251  	JE   LBB1_627
  7252  
  7253  LBB1_626:
  7254  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7255  	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
  7256  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7257  	LONG $0x01c68348         // add    rsi, 1
  7258  	LONG $0xffc78348         // add    rdi, -1
  7259  	JNE  LBB1_626
  7260  
  7261  LBB1_627:
  7262  	LONG $0x03f98349 // cmp    r9, 3
  7263  	JB   LBB1_1069
  7264  
  7265  LBB1_628:
  7266  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7267  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7268  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7269  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7270  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7271  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7272  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7273  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7274  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7275  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7276  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7277  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7278  	LONG $0x04c68348             // add    rsi, 4
  7279  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7280  	JNE  LBB1_628
  7281  	JMP  LBB1_1069
  7282  
  7283  LBB1_12:
  7284  	LONG $0x15fe8040         // cmp    sil, 21
  7285  	JE   LBB1_39
  7286  	LONG $0x16fe8040         // cmp    sil, 22
  7287  	JE   LBB1_47
  7288  	LONG $0x17fe8040         // cmp    sil, 23
  7289  	JNE  LBB1_1069
  7290  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7291  	JG   LBB1_62
  7292  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7293  	JLE  LBB1_102
  7294  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7295  	JE   LBB1_163
  7296  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7297  	JE   LBB1_166
  7298  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7299  	JNE  LBB1_1069
  7300  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7301  	JLE  LBB1_1069
  7302  	WORD $0x018b             // mov    eax, dword [rcx]
  7303  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7304  	LONG $0x08f98341         // cmp    r9d, 8
  7305  	JB   LBB1_22
  7306  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7307  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7308  	JBE  LBB1_456
  7309  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7310  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7311  	JBE  LBB1_456
  7312  
  7313  LBB1_22:
  7314  	WORD $0xf631 // xor    esi, esi
  7315  
  7316  LBB1_633:
  7317  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7318  	WORD $0xf749; BYTE $0xd1 // not    r9
  7319  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7320  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7321  	LONG $0x03e78348         // and    rdi, 3
  7322  	JE   LBB1_635
  7323  
  7324  LBB1_634:
  7325  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7326  	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
  7327  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7328  	LONG $0x01c68348         // add    rsi, 1
  7329  	LONG $0xffc78348         // add    rdi, -1
  7330  	JNE  LBB1_634
  7331  
  7332  LBB1_635:
  7333  	LONG $0x03f98349 // cmp    r9, 3
  7334  	JB   LBB1_1069
  7335  
  7336  LBB1_636:
  7337  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7338  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7339  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7340  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7341  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7342  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7343  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7344  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7345  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7346  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7347  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
  7348  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7349  	LONG $0x04c68348             // add    rsi, 4
  7350  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7351  	JNE  LBB1_636
  7352  	JMP  LBB1_1069
  7353  
  7354  LBB1_23:
  7355  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7356  	JG   LBB1_69
  7357  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7358  	JLE  LBB1_107
  7359  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7360  	JE   LBB1_169
  7361  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7362  	JE   LBB1_172
  7363  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7364  	JNE  LBB1_1069
  7365  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7366  	JLE  LBB1_1069
  7367  	WORD $0x018b             // mov    eax, dword [rcx]
  7368  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7369  	LONG $0x08f98341         // cmp    r9d, 8
  7370  	JB   LBB1_30
  7371  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7372  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7373  	JBE  LBB1_459
  7374  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7375  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7376  	JBE  LBB1_459
  7377  
  7378  LBB1_30:
  7379  	WORD $0xf631 // xor    esi, esi
  7380  
  7381  LBB1_641:
  7382  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7383  	WORD $0xf749; BYTE $0xd1 // not    r9
  7384  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7385  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7386  	LONG $0x03e78348         // and    rdi, 3
  7387  	JE   LBB1_643
  7388  
  7389  LBB1_642:
  7390  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7391  	WORD $0xc101             // add    ecx, eax
  7392  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7393  	LONG $0x01c68348         // add    rsi, 1
  7394  	LONG $0xffc78348         // add    rdi, -1
  7395  	JNE  LBB1_642
  7396  
  7397  LBB1_643:
  7398  	LONG $0x03f98349 // cmp    r9, 3
  7399  	JB   LBB1_1069
  7400  
  7401  LBB1_644:
  7402  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7403  	WORD $0xc101                 // add    ecx, eax
  7404  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7405  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7406  	WORD $0xc101                 // add    ecx, eax
  7407  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7408  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7409  	WORD $0xc101                 // add    ecx, eax
  7410  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7411  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7412  	WORD $0xc101                 // add    ecx, eax
  7413  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7414  	LONG $0x04c68348             // add    rsi, 4
  7415  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7416  	JNE  LBB1_644
  7417  	JMP  LBB1_1069
  7418  
  7419  LBB1_31:
  7420  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7421  	JG   LBB1_76
  7422  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7423  	JLE  LBB1_112
  7424  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7425  	JE   LBB1_175
  7426  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7427  	JE   LBB1_178
  7428  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7429  	JNE  LBB1_1069
  7430  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7431  	JLE  LBB1_1069
  7432  	WORD $0x018b             // mov    eax, dword [rcx]
  7433  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7434  	LONG $0x08f98341         // cmp    r9d, 8
  7435  	JB   LBB1_38
  7436  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7437  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7438  	JBE  LBB1_462
  7439  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7440  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7441  	JBE  LBB1_462
  7442  
  7443  LBB1_38:
  7444  	WORD $0xf631 // xor    esi, esi
  7445  
  7446  LBB1_649:
  7447  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7448  	WORD $0xf749; BYTE $0xd1 // not    r9
  7449  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7450  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7451  	LONG $0x03e78348         // and    rdi, 3
  7452  	JE   LBB1_651
  7453  
  7454  LBB1_650:
  7455  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7456  	WORD $0xc129             // sub    ecx, eax
  7457  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7458  	LONG $0x01c68348         // add    rsi, 1
  7459  	LONG $0xffc78348         // add    rdi, -1
  7460  	JNE  LBB1_650
  7461  
  7462  LBB1_651:
  7463  	LONG $0x03f98349 // cmp    r9, 3
  7464  	JB   LBB1_1069
  7465  
  7466  LBB1_652:
  7467  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7468  	WORD $0xc129                 // sub    ecx, eax
  7469  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7470  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7471  	WORD $0xc129                 // sub    ecx, eax
  7472  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7473  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7474  	WORD $0xc129                 // sub    ecx, eax
  7475  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7476  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7477  	WORD $0xc129                 // sub    ecx, eax
  7478  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7479  	LONG $0x04c68348             // add    rsi, 4
  7480  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7481  	JNE  LBB1_652
  7482  	JMP  LBB1_1069
  7483  
  7484  LBB1_39:
  7485  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7486  	JG   LBB1_83
  7487  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7488  	JLE  LBB1_117
  7489  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7490  	JE   LBB1_181
  7491  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7492  	JE   LBB1_184
  7493  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7494  	JNE  LBB1_1069
  7495  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7496  	JLE  LBB1_1069
  7497  	WORD $0x018b             // mov    eax, dword [rcx]
  7498  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7499  	LONG $0x08f98341         // cmp    r9d, 8
  7500  	JB   LBB1_46
  7501  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7502  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7503  	JBE  LBB1_465
  7504  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7505  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7506  	JBE  LBB1_465
  7507  
  7508  LBB1_46:
  7509  	WORD $0xf631 // xor    esi, esi
  7510  
  7511  LBB1_657:
  7512  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7513  	WORD $0xf749; BYTE $0xd1 // not    r9
  7514  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7515  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7516  	LONG $0x03e78348         // and    rdi, 3
  7517  	JE   LBB1_659
  7518  
  7519  LBB1_658:
  7520  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7521  	WORD $0xc101             // add    ecx, eax
  7522  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7523  	LONG $0x01c68348         // add    rsi, 1
  7524  	LONG $0xffc78348         // add    rdi, -1
  7525  	JNE  LBB1_658
  7526  
  7527  LBB1_659:
  7528  	LONG $0x03f98349 // cmp    r9, 3
  7529  	JB   LBB1_1069
  7530  
  7531  LBB1_660:
  7532  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7533  	WORD $0xc101                 // add    ecx, eax
  7534  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7535  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7536  	WORD $0xc101                 // add    ecx, eax
  7537  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7538  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7539  	WORD $0xc101                 // add    ecx, eax
  7540  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7541  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7542  	WORD $0xc101                 // add    ecx, eax
  7543  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7544  	LONG $0x04c68348             // add    rsi, 4
  7545  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7546  	JNE  LBB1_660
  7547  	JMP  LBB1_1069
  7548  
  7549  LBB1_47:
  7550  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7551  	JG   LBB1_90
  7552  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7553  	JLE  LBB1_122
  7554  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
  7555  	JE   LBB1_187
  7556  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
  7557  	JE   LBB1_190
  7558  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
  7559  	JNE  LBB1_1069
  7560  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7561  	JLE  LBB1_1069
  7562  	WORD $0x018b             // mov    eax, dword [rcx]
  7563  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  7564  	LONG $0x08f98341         // cmp    r9d, 8
  7565  	JB   LBB1_54
  7566  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
  7567  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7568  	JBE  LBB1_468
  7569  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
  7570  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7571  	JBE  LBB1_468
  7572  
  7573  LBB1_54:
  7574  	WORD $0xf631 // xor    esi, esi
  7575  
  7576  LBB1_665:
  7577  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  7578  	WORD $0xf749; BYTE $0xd1 // not    r9
  7579  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  7580  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  7581  	LONG $0x03e78348         // and    rdi, 3
  7582  	JE   LBB1_667
  7583  
  7584  LBB1_666:
  7585  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
  7586  	WORD $0xc129             // sub    ecx, eax
  7587  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
  7588  	LONG $0x01c68348         // add    rsi, 1
  7589  	LONG $0xffc78348         // add    rdi, -1
  7590  	JNE  LBB1_666
  7591  
  7592  LBB1_667:
  7593  	LONG $0x03f98349 // cmp    r9, 3
  7594  	JB   LBB1_1069
  7595  
  7596  LBB1_668:
  7597  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
  7598  	WORD $0xc129                 // sub    ecx, eax
  7599  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
  7600  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
  7601  	WORD $0xc129                 // sub    ecx, eax
  7602  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
  7603  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
  7604  	WORD $0xc129                 // sub    ecx, eax
  7605  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
  7606  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
  7607  	WORD $0xc129                 // sub    ecx, eax
  7608  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
  7609  	LONG $0x04c68348             // add    rsi, 4
  7610  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  7611  	JNE  LBB1_668
  7612  	JMP  LBB1_1069
  7613  
  7614  LBB1_55:
  7615  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7616  	JLE  LBB1_127
  7617  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7618  	JE   LBB1_193
  7619  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7620  	JE   LBB1_196
  7621  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7622  	JNE  LBB1_1069
  7623  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7624  	JLE  LBB1_1069
  7625  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7626  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7627  	LONG $0x04f98341         // cmp    r9d, 4
  7628  	JB   LBB1_61
  7629  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7630  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7631  	JBE  LBB1_471
  7632  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7633  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7634  	JBE  LBB1_471
  7635  
  7636  LBB1_61:
  7637  	WORD $0xc931 // xor    ecx, ecx
  7638  
  7639  LBB1_673:
  7640  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7641  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7642  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7643  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7644  	LONG $0x03e78348         // and    rdi, 3
  7645  	JE   LBB1_675
  7646  
  7647  LBB1_674:
  7648  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7649  	LONG $0xc8590ff2               // mulsd    xmm1, xmm0
  7650  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7651  	LONG $0x01c18348               // add    rcx, 1
  7652  	LONG $0xffc78348               // add    rdi, -1
  7653  	JNE  LBB1_674
  7654  
  7655  LBB1_675:
  7656  	LONG $0x03fe8348 // cmp    rsi, 3
  7657  	JB   LBB1_1069
  7658  
  7659  LBB1_676:
  7660  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7661  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7662  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7663  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7664  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7665  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7666  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7667  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7668  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7669  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7670  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7671  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7672  	LONG $0x04c18348                           // add    rcx, 4
  7673  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7674  	JNE  LBB1_676
  7675  	JMP  LBB1_1069
  7676  
  7677  LBB1_62:
  7678  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7679  	JLE  LBB1_132
  7680  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7681  	JE   LBB1_199
  7682  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7683  	JE   LBB1_202
  7684  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7685  	JNE  LBB1_1069
  7686  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7687  	JLE  LBB1_1069
  7688  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7689  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7690  	LONG $0x04f98341         // cmp    r9d, 4
  7691  	JB   LBB1_68
  7692  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7693  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7694  	JBE  LBB1_474
  7695  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7696  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7697  	JBE  LBB1_474
  7698  
  7699  LBB1_68:
  7700  	WORD $0xc931 // xor    ecx, ecx
  7701  
  7702  LBB1_681:
  7703  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7704  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7705  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7706  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7707  	LONG $0x03e78348         // and    rdi, 3
  7708  	JE   LBB1_683
  7709  
  7710  LBB1_682:
  7711  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7712  	LONG $0xc8590ff2               // mulsd    xmm1, xmm0
  7713  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7714  	LONG $0x01c18348               // add    rcx, 1
  7715  	LONG $0xffc78348               // add    rdi, -1
  7716  	JNE  LBB1_682
  7717  
  7718  LBB1_683:
  7719  	LONG $0x03fe8348 // cmp    rsi, 3
  7720  	JB   LBB1_1069
  7721  
  7722  LBB1_684:
  7723  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7724  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7725  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7726  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7727  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7728  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7729  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7730  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7731  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7732  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7733  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
  7734  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7735  	LONG $0x04c18348                           // add    rcx, 4
  7736  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7737  	JNE  LBB1_684
  7738  	JMP  LBB1_1069
  7739  
  7740  LBB1_69:
  7741  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7742  	JLE  LBB1_137
  7743  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7744  	JE   LBB1_205
  7745  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7746  	JE   LBB1_208
  7747  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7748  	JNE  LBB1_1069
  7749  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7750  	JLE  LBB1_1069
  7751  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7752  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7753  	LONG $0x04f98341         // cmp    r9d, 4
  7754  	JB   LBB1_75
  7755  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7756  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7757  	JBE  LBB1_477
  7758  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7759  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7760  	JBE  LBB1_477
  7761  
  7762  LBB1_75:
  7763  	WORD $0xc931 // xor    ecx, ecx
  7764  
  7765  LBB1_689:
  7766  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7767  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7768  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7769  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7770  	LONG $0x03e78348         // and    rdi, 3
  7771  	JE   LBB1_691
  7772  
  7773  LBB1_690:
  7774  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7775  	LONG $0xc8580ff2               // addsd    xmm1, xmm0
  7776  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7777  	LONG $0x01c18348               // add    rcx, 1
  7778  	LONG $0xffc78348               // add    rdi, -1
  7779  	JNE  LBB1_690
  7780  
  7781  LBB1_691:
  7782  	LONG $0x03fe8348 // cmp    rsi, 3
  7783  	JB   LBB1_1069
  7784  
  7785  LBB1_692:
  7786  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7787  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7788  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7789  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7790  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7791  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7792  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7793  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7794  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7795  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7796  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7797  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7798  	LONG $0x04c18348                           // add    rcx, 4
  7799  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7800  	JNE  LBB1_692
  7801  	JMP  LBB1_1069
  7802  
  7803  LBB1_76:
  7804  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7805  	JLE  LBB1_142
  7806  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7807  	JE   LBB1_211
  7808  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7809  	JE   LBB1_214
  7810  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7811  	JNE  LBB1_1069
  7812  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7813  	JLE  LBB1_1069
  7814  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7815  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7816  	LONG $0x04f98341         // cmp    r9d, 4
  7817  	JB   LBB1_82
  7818  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7819  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7820  	JBE  LBB1_480
  7821  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7822  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7823  	JBE  LBB1_480
  7824  
  7825  LBB1_82:
  7826  	WORD $0xc931 // xor    ecx, ecx
  7827  
  7828  LBB1_697:
  7829  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7830  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7831  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7832  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7833  	LONG $0x03e78348         // and    rdi, 3
  7834  	JE   LBB1_699
  7835  
  7836  LBB1_698:
  7837  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7838  	LONG $0xc85c0ff2               // subsd    xmm1, xmm0
  7839  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7840  	LONG $0x01c18348               // add    rcx, 1
  7841  	LONG $0xffc78348               // add    rdi, -1
  7842  	JNE  LBB1_698
  7843  
  7844  LBB1_699:
  7845  	LONG $0x03fe8348 // cmp    rsi, 3
  7846  	JB   LBB1_1069
  7847  
  7848  LBB1_700:
  7849  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7850  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7851  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7852  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7853  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7854  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7855  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7856  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7857  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7858  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7859  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7860  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7861  	LONG $0x04c18348                           // add    rcx, 4
  7862  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7863  	JNE  LBB1_700
  7864  	JMP  LBB1_1069
  7865  
  7866  LBB1_83:
  7867  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7868  	JLE  LBB1_147
  7869  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7870  	JE   LBB1_217
  7871  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7872  	JE   LBB1_220
  7873  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7874  	JNE  LBB1_1069
  7875  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7876  	JLE  LBB1_1069
  7877  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7878  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7879  	LONG $0x04f98341         // cmp    r9d, 4
  7880  	JB   LBB1_89
  7881  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7882  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7883  	JBE  LBB1_483
  7884  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7885  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7886  	JBE  LBB1_483
  7887  
  7888  LBB1_89:
  7889  	WORD $0xc931 // xor    ecx, ecx
  7890  
  7891  LBB1_705:
  7892  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7893  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7894  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7895  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7896  	LONG $0x03e78348         // and    rdi, 3
  7897  	JE   LBB1_707
  7898  
  7899  LBB1_706:
  7900  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7901  	LONG $0xc8580ff2               // addsd    xmm1, xmm0
  7902  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7903  	LONG $0x01c18348               // add    rcx, 1
  7904  	LONG $0xffc78348               // add    rdi, -1
  7905  	JNE  LBB1_706
  7906  
  7907  LBB1_707:
  7908  	LONG $0x03fe8348 // cmp    rsi, 3
  7909  	JB   LBB1_1069
  7910  
  7911  LBB1_708:
  7912  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7913  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7914  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7915  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7916  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7917  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7918  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7919  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7920  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7921  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7922  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
  7923  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7924  	LONG $0x04c18348                           // add    rcx, 4
  7925  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7926  	JNE  LBB1_708
  7927  	JMP  LBB1_1069
  7928  
  7929  LBB1_90:
  7930  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  7931  	JLE  LBB1_152
  7932  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
  7933  	JE   LBB1_223
  7934  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
  7935  	JE   LBB1_226
  7936  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
  7937  	JNE  LBB1_1069
  7938  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7939  	JLE  LBB1_1069
  7940  	LONG $0x01100ff2         // movsd    xmm0, qword [rcx]
  7941  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  7942  	LONG $0x04f98341         // cmp    r9d, 4
  7943  	JB   LBB1_96
  7944  	LONG $0xc20c8d48         // lea    rcx, [rdx + 8*rax]
  7945  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  7946  	JBE  LBB1_486
  7947  	LONG $0xc00c8d49         // lea    rcx, [r8 + 8*rax]
  7948  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  7949  	JBE  LBB1_486
  7950  
  7951  LBB1_96:
  7952  	WORD $0xc931 // xor    ecx, ecx
  7953  
  7954  LBB1_713:
  7955  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  7956  	WORD $0xf748; BYTE $0xd6 // not    rsi
  7957  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  7958  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  7959  	LONG $0x03e78348         // and    rdi, 3
  7960  	JE   LBB1_715
  7961  
  7962  LBB1_714:
  7963  	LONG $0x0c100ff2; BYTE $0xca   // movsd    xmm1, qword [rdx + 8*rcx]
  7964  	LONG $0xc85c0ff2               // subsd    xmm1, xmm0
  7965  	LONG $0x110f41f2; WORD $0xc80c // movsd    qword [r8 + 8*rcx], xmm1
  7966  	LONG $0x01c18348               // add    rcx, 1
  7967  	LONG $0xffc78348               // add    rdi, -1
  7968  	JNE  LBB1_714
  7969  
  7970  LBB1_715:
  7971  	LONG $0x03fe8348 // cmp    rsi, 3
  7972  	JB   LBB1_1069
  7973  
  7974  LBB1_716:
  7975  	LONG $0x0c100ff2; BYTE $0xca               // movsd    xmm1, qword [rdx + 8*rcx]
  7976  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7977  	LONG $0x110f41f2; WORD $0xc80c             // movsd    qword [r8 + 8*rcx], xmm1
  7978  	LONG $0x4c100ff2; WORD $0x08ca             // movsd    xmm1, qword [rdx + 8*rcx + 8]
  7979  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7980  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x08 // movsd    qword [r8 + 8*rcx + 8], xmm1
  7981  	LONG $0x4c100ff2; WORD $0x10ca             // movsd    xmm1, qword [rdx + 8*rcx + 16]
  7982  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7983  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x10 // movsd    qword [r8 + 8*rcx + 16], xmm1
  7984  	LONG $0x4c100ff2; WORD $0x18ca             // movsd    xmm1, qword [rdx + 8*rcx + 24]
  7985  	LONG $0xc85c0ff2                           // subsd    xmm1, xmm0
  7986  	LONG $0x110f41f2; WORD $0xc84c; BYTE $0x18 // movsd    qword [r8 + 8*rcx + 24], xmm1
  7987  	LONG $0x04c18348                           // add    rcx, 4
  7988  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  7989  	JNE  LBB1_716
  7990  	JMP  LBB1_1069
  7991  
  7992  LBB1_97:
  7993  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  7994  	JE   LBB1_229
  7995  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  7996  	JNE  LBB1_1069
  7997  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  7998  	JLE  LBB1_1069
  7999  	WORD $0x098a             // mov    cl, byte [rcx]
  8000  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8001  	LONG $0x20f98341         // cmp    r9d, 32
  8002  	JB   LBB1_101
  8003  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  8004  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  8005  	JBE  LBB1_489
  8006  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
  8007  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  8008  	JBE  LBB1_489
  8009  
  8010  LBB1_101:
  8011  	WORD $0xff31 // xor    edi, edi
  8012  
  8013  LBB1_721:
  8014  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  8015  	WORD $0xf749; BYTE $0xd1 // not    r9
  8016  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8017  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  8018  	LONG $0x03e68348         // and    rsi, 3
  8019  	JE   LBB1_723
  8020  
  8021  LBB1_722:
  8022  	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
  8023  	WORD $0xe1f6     // mul    cl
  8024  	LONG $0x38048841 // mov    byte [r8 + rdi], al
  8025  	LONG $0x01c78348 // add    rdi, 1
  8026  	LONG $0xffc68348 // add    rsi, -1
  8027  	JNE  LBB1_722
  8028  
  8029  LBB1_723:
  8030  	LONG $0x03f98349 // cmp    r9, 3
  8031  	JB   LBB1_1069
  8032  
  8033  LBB1_724:
  8034  	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
  8035  	WORD $0xe1f6                 // mul    cl
  8036  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  8037  	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
  8038  	WORD $0xe1f6                 // mul    cl
  8039  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  8040  	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
  8041  	WORD $0xe1f6                 // mul    cl
  8042  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  8043  	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
  8044  	WORD $0xe1f6                 // mul    cl
  8045  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  8046  	LONG $0x04c78348             // add    rdi, 4
  8047  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  8048  	JNE  LBB1_724
  8049  	JMP  LBB1_1069
  8050  
  8051  LBB1_102:
  8052  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  8053  	JE   LBB1_232
  8054  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  8055  	JNE  LBB1_1069
  8056  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8057  	JLE  LBB1_1069
  8058  	WORD $0x098a             // mov    cl, byte [rcx]
  8059  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8060  	LONG $0x20f98341         // cmp    r9d, 32
  8061  	JB   LBB1_106
  8062  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  8063  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  8064  	JBE  LBB1_492
  8065  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
  8066  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  8067  	JBE  LBB1_492
  8068  
  8069  LBB1_106:
  8070  	WORD $0xff31 // xor    edi, edi
  8071  
  8072  LBB1_729:
  8073  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  8074  	WORD $0xf749; BYTE $0xd1 // not    r9
  8075  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8076  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  8077  	LONG $0x03e68348         // and    rsi, 3
  8078  	JE   LBB1_731
  8079  
  8080  LBB1_730:
  8081  	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
  8082  	WORD $0xe1f6     // mul    cl
  8083  	LONG $0x38048841 // mov    byte [r8 + rdi], al
  8084  	LONG $0x01c78348 // add    rdi, 1
  8085  	LONG $0xffc68348 // add    rsi, -1
  8086  	JNE  LBB1_730
  8087  
  8088  LBB1_731:
  8089  	LONG $0x03f98349 // cmp    r9, 3
  8090  	JB   LBB1_1069
  8091  
  8092  LBB1_732:
  8093  	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
  8094  	WORD $0xe1f6                 // mul    cl
  8095  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  8096  	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
  8097  	WORD $0xe1f6                 // mul    cl
  8098  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  8099  	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
  8100  	WORD $0xe1f6                 // mul    cl
  8101  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  8102  	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
  8103  	WORD $0xe1f6                 // mul    cl
  8104  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  8105  	LONG $0x04c78348             // add    rdi, 4
  8106  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  8107  	JNE  LBB1_732
  8108  	JMP  LBB1_1069
  8109  
  8110  LBB1_107:
  8111  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  8112  	JE   LBB1_235
  8113  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  8114  	JNE  LBB1_1069
  8115  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8116  	JLE  LBB1_1069
  8117  	WORD $0x018a             // mov    al, byte [rcx]
  8118  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8119  	LONG $0x20f98341         // cmp    r9d, 32
  8120  	JB   LBB1_111
  8121  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
  8122  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8123  	JBE  LBB1_495
  8124  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
  8125  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8126  	JBE  LBB1_495
  8127  
  8128  LBB1_111:
  8129  	WORD $0xf631 // xor    esi, esi
  8130  
  8131  LBB1_737:
  8132  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8133  	WORD $0xf749; BYTE $0xd1 // not    r9
  8134  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8135  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8136  	LONG $0x03e78348         // and    rdi, 3
  8137  	JE   LBB1_739
  8138  
  8139  LBB1_738:
  8140  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
  8141  	WORD $0xc100     // add    cl, al
  8142  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
  8143  	LONG $0x01c68348 // add    rsi, 1
  8144  	LONG $0xffc78348 // add    rdi, -1
  8145  	JNE  LBB1_738
  8146  
  8147  LBB1_739:
  8148  	LONG $0x03f98349 // cmp    r9, 3
  8149  	JB   LBB1_1069
  8150  
  8151  LBB1_740:
  8152  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
  8153  	WORD $0xc100                 // add    cl, al
  8154  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
  8155  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
  8156  	WORD $0xc100                 // add    cl, al
  8157  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
  8158  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
  8159  	WORD $0xc100                 // add    cl, al
  8160  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
  8161  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
  8162  	WORD $0xc100                 // add    cl, al
  8163  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
  8164  	LONG $0x04c68348             // add    rsi, 4
  8165  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8166  	JNE  LBB1_740
  8167  	JMP  LBB1_1069
  8168  
  8169  LBB1_112:
  8170  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  8171  	JE   LBB1_238
  8172  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  8173  	JNE  LBB1_1069
  8174  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8175  	JLE  LBB1_1069
  8176  	WORD $0x018a             // mov    al, byte [rcx]
  8177  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8178  	LONG $0x20f98341         // cmp    r9d, 32
  8179  	JB   LBB1_116
  8180  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
  8181  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8182  	JBE  LBB1_498
  8183  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
  8184  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8185  	JBE  LBB1_498
  8186  
  8187  LBB1_116:
  8188  	WORD $0xf631 // xor    esi, esi
  8189  
  8190  LBB1_745:
  8191  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8192  	WORD $0xf749; BYTE $0xd1 // not    r9
  8193  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8194  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8195  	LONG $0x03e78348         // and    rdi, 3
  8196  	JE   LBB1_747
  8197  
  8198  LBB1_746:
  8199  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
  8200  	WORD $0xc128     // sub    cl, al
  8201  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
  8202  	LONG $0x01c68348 // add    rsi, 1
  8203  	LONG $0xffc78348 // add    rdi, -1
  8204  	JNE  LBB1_746
  8205  
  8206  LBB1_747:
  8207  	LONG $0x03f98349 // cmp    r9, 3
  8208  	JB   LBB1_1069
  8209  
  8210  LBB1_748:
  8211  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
  8212  	WORD $0xc128                 // sub    cl, al
  8213  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
  8214  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
  8215  	WORD $0xc128                 // sub    cl, al
  8216  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
  8217  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
  8218  	WORD $0xc128                 // sub    cl, al
  8219  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
  8220  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
  8221  	WORD $0xc128                 // sub    cl, al
  8222  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
  8223  	LONG $0x04c68348             // add    rsi, 4
  8224  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8225  	JNE  LBB1_748
  8226  	JMP  LBB1_1069
  8227  
  8228  LBB1_117:
  8229  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  8230  	JE   LBB1_241
  8231  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  8232  	JNE  LBB1_1069
  8233  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8234  	JLE  LBB1_1069
  8235  	WORD $0x018a             // mov    al, byte [rcx]
  8236  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8237  	LONG $0x20f98341         // cmp    r9d, 32
  8238  	JB   LBB1_121
  8239  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
  8240  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8241  	JBE  LBB1_501
  8242  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
  8243  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8244  	JBE  LBB1_501
  8245  
  8246  LBB1_121:
  8247  	WORD $0xf631 // xor    esi, esi
  8248  
  8249  LBB1_753:
  8250  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8251  	WORD $0xf749; BYTE $0xd1 // not    r9
  8252  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8253  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8254  	LONG $0x03e78348         // and    rdi, 3
  8255  	JE   LBB1_755
  8256  
  8257  LBB1_754:
  8258  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
  8259  	WORD $0xc100     // add    cl, al
  8260  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
  8261  	LONG $0x01c68348 // add    rsi, 1
  8262  	LONG $0xffc78348 // add    rdi, -1
  8263  	JNE  LBB1_754
  8264  
  8265  LBB1_755:
  8266  	LONG $0x03f98349 // cmp    r9, 3
  8267  	JB   LBB1_1069
  8268  
  8269  LBB1_756:
  8270  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
  8271  	WORD $0xc100                 // add    cl, al
  8272  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
  8273  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
  8274  	WORD $0xc100                 // add    cl, al
  8275  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
  8276  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
  8277  	WORD $0xc100                 // add    cl, al
  8278  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
  8279  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
  8280  	WORD $0xc100                 // add    cl, al
  8281  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
  8282  	LONG $0x04c68348             // add    rsi, 4
  8283  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8284  	JNE  LBB1_756
  8285  	JMP  LBB1_1069
  8286  
  8287  LBB1_122:
  8288  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
  8289  	JE   LBB1_244
  8290  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
  8291  	JNE  LBB1_1069
  8292  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8293  	JLE  LBB1_1069
  8294  	WORD $0x018a             // mov    al, byte [rcx]
  8295  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8296  	LONG $0x20f98341         // cmp    r9d, 32
  8297  	JB   LBB1_126
  8298  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
  8299  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8300  	JBE  LBB1_504
  8301  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
  8302  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8303  	JBE  LBB1_504
  8304  
  8305  LBB1_126:
  8306  	WORD $0xf631 // xor    esi, esi
  8307  
  8308  LBB1_761:
  8309  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8310  	WORD $0xf749; BYTE $0xd1 // not    r9
  8311  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8312  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8313  	LONG $0x03e78348         // and    rdi, 3
  8314  	JE   LBB1_763
  8315  
  8316  LBB1_762:
  8317  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
  8318  	WORD $0xc128     // sub    cl, al
  8319  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
  8320  	LONG $0x01c68348 // add    rsi, 1
  8321  	LONG $0xffc78348 // add    rdi, -1
  8322  	JNE  LBB1_762
  8323  
  8324  LBB1_763:
  8325  	LONG $0x03f98349 // cmp    r9, 3
  8326  	JB   LBB1_1069
  8327  
  8328  LBB1_764:
  8329  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
  8330  	WORD $0xc128                 // sub    cl, al
  8331  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
  8332  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
  8333  	WORD $0xc128                 // sub    cl, al
  8334  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
  8335  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
  8336  	WORD $0xc128                 // sub    cl, al
  8337  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
  8338  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
  8339  	WORD $0xc128                 // sub    cl, al
  8340  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
  8341  	LONG $0x04c68348             // add    rsi, 4
  8342  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8343  	JNE  LBB1_764
  8344  	JMP  LBB1_1069
  8345  
  8346  LBB1_127:
  8347  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8348  	JE   LBB1_247
  8349  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8350  	JNE  LBB1_1069
  8351  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8352  	JLE  LBB1_1069
  8353  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8354  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  8355  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  8356  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  8357  	LONG $0x03e18341         // and    r9d, 3
  8358  	LONG $0x03ff8348         // cmp    rdi, 3
  8359  	JAE  LBB1_319
  8360  	WORD $0xff31             // xor    edi, edi
  8361  	JMP  LBB1_321
  8362  
  8363  LBB1_132:
  8364  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8365  	JE   LBB1_250
  8366  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8367  	JNE  LBB1_1069
  8368  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8369  	JLE  LBB1_1069
  8370  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8371  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  8372  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  8373  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  8374  	LONG $0x03e18341         // and    r9d, 3
  8375  	LONG $0x03ff8348         // cmp    rdi, 3
  8376  	JAE  LBB1_324
  8377  	WORD $0xff31             // xor    edi, edi
  8378  	JMP  LBB1_326
  8379  
  8380  LBB1_137:
  8381  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8382  	JE   LBB1_253
  8383  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8384  	JNE  LBB1_1069
  8385  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8386  	JLE  LBB1_1069
  8387  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8388  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8389  	LONG $0x04f98341         // cmp    r9d, 4
  8390  	JB   LBB1_141
  8391  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  8392  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8393  	JBE  LBB1_507
  8394  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  8395  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8396  	JBE  LBB1_507
  8397  
  8398  LBB1_141:
  8399  	WORD $0xf631 // xor    esi, esi
  8400  
  8401  LBB1_769:
  8402  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8403  	WORD $0xf749; BYTE $0xd1 // not    r9
  8404  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8405  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8406  	LONG $0x03e78348         // and    rdi, 3
  8407  	JE   LBB1_771
  8408  
  8409  LBB1_770:
  8410  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  8411  	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
  8412  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  8413  	LONG $0x01c68348         // add    rsi, 1
  8414  	LONG $0xffc78348         // add    rdi, -1
  8415  	JNE  LBB1_770
  8416  
  8417  LBB1_771:
  8418  	LONG $0x03f98349 // cmp    r9, 3
  8419  	JB   LBB1_1069
  8420  
  8421  LBB1_772:
  8422  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  8423  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8424  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  8425  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  8426  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8427  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  8428  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  8429  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8430  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  8431  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  8432  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8433  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  8434  	LONG $0x04c68348             // add    rsi, 4
  8435  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8436  	JNE  LBB1_772
  8437  	JMP  LBB1_1069
  8438  
  8439  LBB1_142:
  8440  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8441  	JE   LBB1_256
  8442  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8443  	JNE  LBB1_1069
  8444  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8445  	JLE  LBB1_1069
  8446  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8447  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8448  	LONG $0x04f98341         // cmp    r9d, 4
  8449  	JB   LBB1_146
  8450  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  8451  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8452  	JBE  LBB1_510
  8453  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  8454  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8455  	JBE  LBB1_510
  8456  
  8457  LBB1_146:
  8458  	WORD $0xf631 // xor    esi, esi
  8459  
  8460  LBB1_777:
  8461  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8462  	WORD $0xf749; BYTE $0xd1 // not    r9
  8463  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8464  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8465  	LONG $0x03e78348         // and    rdi, 3
  8466  	JE   LBB1_779
  8467  
  8468  LBB1_778:
  8469  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  8470  	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
  8471  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  8472  	LONG $0x01c68348         // add    rsi, 1
  8473  	LONG $0xffc78348         // add    rdi, -1
  8474  	JNE  LBB1_778
  8475  
  8476  LBB1_779:
  8477  	LONG $0x03f98349 // cmp    r9, 3
  8478  	JB   LBB1_1069
  8479  
  8480  LBB1_780:
  8481  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  8482  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8483  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  8484  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  8485  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8486  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  8487  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  8488  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8489  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  8490  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  8491  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8492  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  8493  	LONG $0x04c68348             // add    rsi, 4
  8494  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8495  	JNE  LBB1_780
  8496  	JMP  LBB1_1069
  8497  
  8498  LBB1_147:
  8499  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8500  	JE   LBB1_259
  8501  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8502  	JNE  LBB1_1069
  8503  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8504  	JLE  LBB1_1069
  8505  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8506  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8507  	LONG $0x04f98341         // cmp    r9d, 4
  8508  	JB   LBB1_151
  8509  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  8510  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8511  	JBE  LBB1_513
  8512  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  8513  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8514  	JBE  LBB1_513
  8515  
  8516  LBB1_151:
  8517  	WORD $0xf631 // xor    esi, esi
  8518  
  8519  LBB1_785:
  8520  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8521  	WORD $0xf749; BYTE $0xd1 // not    r9
  8522  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8523  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8524  	LONG $0x03e78348         // and    rdi, 3
  8525  	JE   LBB1_787
  8526  
  8527  LBB1_786:
  8528  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  8529  	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
  8530  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  8531  	LONG $0x01c68348         // add    rsi, 1
  8532  	LONG $0xffc78348         // add    rdi, -1
  8533  	JNE  LBB1_786
  8534  
  8535  LBB1_787:
  8536  	LONG $0x03f98349 // cmp    r9, 3
  8537  	JB   LBB1_1069
  8538  
  8539  LBB1_788:
  8540  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  8541  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8542  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  8543  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  8544  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8545  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  8546  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  8547  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8548  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  8549  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  8550  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  8551  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  8552  	LONG $0x04c68348             // add    rsi, 4
  8553  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8554  	JNE  LBB1_788
  8555  	JMP  LBB1_1069
  8556  
  8557  LBB1_152:
  8558  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
  8559  	JE   LBB1_262
  8560  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
  8561  	JNE  LBB1_1069
  8562  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8563  	JLE  LBB1_1069
  8564  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  8565  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8566  	LONG $0x04f98341         // cmp    r9d, 4
  8567  	JB   LBB1_156
  8568  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  8569  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8570  	JBE  LBB1_516
  8571  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  8572  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8573  	JBE  LBB1_516
  8574  
  8575  LBB1_156:
  8576  	WORD $0xf631 // xor    esi, esi
  8577  
  8578  LBB1_793:
  8579  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8580  	WORD $0xf749; BYTE $0xd1 // not    r9
  8581  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8582  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8583  	LONG $0x03e78348         // and    rdi, 3
  8584  	JE   LBB1_795
  8585  
  8586  LBB1_794:
  8587  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  8588  	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
  8589  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  8590  	LONG $0x01c68348         // add    rsi, 1
  8591  	LONG $0xffc78348         // add    rdi, -1
  8592  	JNE  LBB1_794
  8593  
  8594  LBB1_795:
  8595  	LONG $0x03f98349 // cmp    r9, 3
  8596  	JB   LBB1_1069
  8597  
  8598  LBB1_796:
  8599  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  8600  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8601  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  8602  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  8603  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8604  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  8605  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  8606  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8607  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  8608  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  8609  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  8610  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  8611  	LONG $0x04c68348             // add    rsi, 4
  8612  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  8613  	JNE  LBB1_796
  8614  	JMP  LBB1_1069
  8615  
  8616  LBB1_157:
  8617  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8618  	JLE  LBB1_1069
  8619  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8620  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8621  	LONG $0x10f98341         // cmp    r9d, 16
  8622  	JB   LBB1_159
  8623  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8624  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8625  	JBE  LBB1_519
  8626  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8627  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8628  	JBE  LBB1_519
  8629  
  8630  LBB1_159:
  8631  	WORD $0xf631 // xor    esi, esi
  8632  
  8633  LBB1_801:
  8634  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8635  	WORD $0xf749; BYTE $0xd1 // not    r9
  8636  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8637  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8638  	LONG $0x03e78348         // and    rdi, 3
  8639  	JE   LBB1_803
  8640  
  8641  LBB1_802:
  8642  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8643  	LONG $0xc8af0f66             // imul    cx, ax
  8644  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8645  	LONG $0x01c68348             // add    rsi, 1
  8646  	LONG $0xffc78348             // add    rdi, -1
  8647  	JNE  LBB1_802
  8648  
  8649  LBB1_803:
  8650  	LONG $0x03f98349 // cmp    r9, 3
  8651  	JB   LBB1_1069
  8652  
  8653  LBB1_804:
  8654  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8655  	LONG $0xc8af0f66               // imul    cx, ax
  8656  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8657  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8658  	LONG $0xc8af0f66               // imul    cx, ax
  8659  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8660  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8661  	LONG $0xc8af0f66               // imul    cx, ax
  8662  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8663  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8664  	LONG $0xc8af0f66               // imul    cx, ax
  8665  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8666  	LONG $0x04c68348               // add    rsi, 4
  8667  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8668  	JNE  LBB1_804
  8669  	JMP  LBB1_1069
  8670  
  8671  LBB1_160:
  8672  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8673  	JLE  LBB1_1069
  8674  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8675  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8676  	LONG $0x10f98341         // cmp    r9d, 16
  8677  	JB   LBB1_162
  8678  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8679  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8680  	JBE  LBB1_522
  8681  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8682  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8683  	JBE  LBB1_522
  8684  
  8685  LBB1_162:
  8686  	WORD $0xf631 // xor    esi, esi
  8687  
  8688  LBB1_809:
  8689  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8690  	WORD $0xf749; BYTE $0xd1 // not    r9
  8691  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8692  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8693  	LONG $0x03e78348         // and    rdi, 3
  8694  	JE   LBB1_811
  8695  
  8696  LBB1_810:
  8697  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8698  	LONG $0xc8af0f66             // imul    cx, ax
  8699  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8700  	LONG $0x01c68348             // add    rsi, 1
  8701  	LONG $0xffc78348             // add    rdi, -1
  8702  	JNE  LBB1_810
  8703  
  8704  LBB1_811:
  8705  	LONG $0x03f98349 // cmp    r9, 3
  8706  	JB   LBB1_1069
  8707  
  8708  LBB1_812:
  8709  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8710  	LONG $0xc8af0f66               // imul    cx, ax
  8711  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8712  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8713  	LONG $0xc8af0f66               // imul    cx, ax
  8714  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8715  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8716  	LONG $0xc8af0f66               // imul    cx, ax
  8717  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8718  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8719  	LONG $0xc8af0f66               // imul    cx, ax
  8720  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8721  	LONG $0x04c68348               // add    rsi, 4
  8722  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8723  	JNE  LBB1_812
  8724  	JMP  LBB1_1069
  8725  
  8726  LBB1_163:
  8727  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8728  	JLE  LBB1_1069
  8729  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8730  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8731  	LONG $0x10f98341         // cmp    r9d, 16
  8732  	JB   LBB1_165
  8733  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8734  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8735  	JBE  LBB1_525
  8736  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8737  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8738  	JBE  LBB1_525
  8739  
  8740  LBB1_165:
  8741  	WORD $0xf631 // xor    esi, esi
  8742  
  8743  LBB1_817:
  8744  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8745  	WORD $0xf749; BYTE $0xd1 // not    r9
  8746  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8747  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8748  	LONG $0x03e78348         // and    rdi, 3
  8749  	JE   LBB1_819
  8750  
  8751  LBB1_818:
  8752  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8753  	LONG $0xc8af0f66             // imul    cx, ax
  8754  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8755  	LONG $0x01c68348             // add    rsi, 1
  8756  	LONG $0xffc78348             // add    rdi, -1
  8757  	JNE  LBB1_818
  8758  
  8759  LBB1_819:
  8760  	LONG $0x03f98349 // cmp    r9, 3
  8761  	JB   LBB1_1069
  8762  
  8763  LBB1_820:
  8764  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8765  	LONG $0xc8af0f66               // imul    cx, ax
  8766  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8767  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8768  	LONG $0xc8af0f66               // imul    cx, ax
  8769  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8770  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8771  	LONG $0xc8af0f66               // imul    cx, ax
  8772  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8773  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8774  	LONG $0xc8af0f66               // imul    cx, ax
  8775  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8776  	LONG $0x04c68348               // add    rsi, 4
  8777  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8778  	JNE  LBB1_820
  8779  	JMP  LBB1_1069
  8780  
  8781  LBB1_166:
  8782  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8783  	JLE  LBB1_1069
  8784  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8785  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8786  	LONG $0x10f98341         // cmp    r9d, 16
  8787  	JB   LBB1_168
  8788  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8789  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8790  	JBE  LBB1_528
  8791  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8792  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8793  	JBE  LBB1_528
  8794  
  8795  LBB1_168:
  8796  	WORD $0xf631 // xor    esi, esi
  8797  
  8798  LBB1_825:
  8799  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8800  	WORD $0xf749; BYTE $0xd1 // not    r9
  8801  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8802  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8803  	LONG $0x03e78348         // and    rdi, 3
  8804  	JE   LBB1_827
  8805  
  8806  LBB1_826:
  8807  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8808  	LONG $0xc8af0f66             // imul    cx, ax
  8809  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8810  	LONG $0x01c68348             // add    rsi, 1
  8811  	LONG $0xffc78348             // add    rdi, -1
  8812  	JNE  LBB1_826
  8813  
  8814  LBB1_827:
  8815  	LONG $0x03f98349 // cmp    r9, 3
  8816  	JB   LBB1_1069
  8817  
  8818  LBB1_828:
  8819  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8820  	LONG $0xc8af0f66               // imul    cx, ax
  8821  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8822  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8823  	LONG $0xc8af0f66               // imul    cx, ax
  8824  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8825  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8826  	LONG $0xc8af0f66               // imul    cx, ax
  8827  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8828  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8829  	LONG $0xc8af0f66               // imul    cx, ax
  8830  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8831  	LONG $0x04c68348               // add    rsi, 4
  8832  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8833  	JNE  LBB1_828
  8834  	JMP  LBB1_1069
  8835  
  8836  LBB1_169:
  8837  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8838  	JLE  LBB1_1069
  8839  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8840  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8841  	LONG $0x10f98341         // cmp    r9d, 16
  8842  	JB   LBB1_171
  8843  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8844  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8845  	JBE  LBB1_531
  8846  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8847  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8848  	JBE  LBB1_531
  8849  
  8850  LBB1_171:
  8851  	WORD $0xf631 // xor    esi, esi
  8852  
  8853  LBB1_833:
  8854  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8855  	WORD $0xf749; BYTE $0xd1 // not    r9
  8856  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8857  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8858  	LONG $0x03e78348         // and    rdi, 3
  8859  	JE   LBB1_835
  8860  
  8861  LBB1_834:
  8862  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8863  	WORD $0x0166; BYTE $0xc1     // add    cx, ax
  8864  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8865  	LONG $0x01c68348             // add    rsi, 1
  8866  	LONG $0xffc78348             // add    rdi, -1
  8867  	JNE  LBB1_834
  8868  
  8869  LBB1_835:
  8870  	LONG $0x03f98349 // cmp    r9, 3
  8871  	JB   LBB1_1069
  8872  
  8873  LBB1_836:
  8874  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8875  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8876  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8877  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8878  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8879  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8880  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8881  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8882  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8883  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8884  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8885  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8886  	LONG $0x04c68348               // add    rsi, 4
  8887  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8888  	JNE  LBB1_836
  8889  	JMP  LBB1_1069
  8890  
  8891  LBB1_172:
  8892  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8893  	JLE  LBB1_1069
  8894  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8895  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8896  	LONG $0x10f98341         // cmp    r9d, 16
  8897  	JB   LBB1_174
  8898  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8899  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8900  	JBE  LBB1_534
  8901  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8902  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8903  	JBE  LBB1_534
  8904  
  8905  LBB1_174:
  8906  	WORD $0xf631 // xor    esi, esi
  8907  
  8908  LBB1_841:
  8909  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8910  	WORD $0xf749; BYTE $0xd1 // not    r9
  8911  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8912  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8913  	LONG $0x03e78348         // and    rdi, 3
  8914  	JE   LBB1_843
  8915  
  8916  LBB1_842:
  8917  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8918  	WORD $0x0166; BYTE $0xc1     // add    cx, ax
  8919  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8920  	LONG $0x01c68348             // add    rsi, 1
  8921  	LONG $0xffc78348             // add    rdi, -1
  8922  	JNE  LBB1_842
  8923  
  8924  LBB1_843:
  8925  	LONG $0x03f98349 // cmp    r9, 3
  8926  	JB   LBB1_1069
  8927  
  8928  LBB1_844:
  8929  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8930  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8931  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8932  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8933  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8934  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8935  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8936  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8937  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8938  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8939  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  8940  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8941  	LONG $0x04c68348               // add    rsi, 4
  8942  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8943  	JNE  LBB1_844
  8944  	JMP  LBB1_1069
  8945  
  8946  LBB1_175:
  8947  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  8948  	JLE  LBB1_1069
  8949  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  8950  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  8951  	LONG $0x10f98341         // cmp    r9d, 16
  8952  	JB   LBB1_177
  8953  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  8954  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  8955  	JBE  LBB1_537
  8956  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  8957  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  8958  	JBE  LBB1_537
  8959  
  8960  LBB1_177:
  8961  	WORD $0xf631 // xor    esi, esi
  8962  
  8963  LBB1_849:
  8964  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  8965  	WORD $0xf749; BYTE $0xd1 // not    r9
  8966  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  8967  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  8968  	LONG $0x03e78348         // and    rdi, 3
  8969  	JE   LBB1_851
  8970  
  8971  LBB1_850:
  8972  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  8973  	WORD $0xc129                 // sub    ecx, eax
  8974  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  8975  	LONG $0x01c68348             // add    rsi, 1
  8976  	LONG $0xffc78348             // add    rdi, -1
  8977  	JNE  LBB1_850
  8978  
  8979  LBB1_851:
  8980  	LONG $0x03f98349 // cmp    r9, 3
  8981  	JB   LBB1_1069
  8982  
  8983  LBB1_852:
  8984  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  8985  	WORD $0xc129                   // sub    ecx, eax
  8986  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  8987  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  8988  	WORD $0xc129                   // sub    ecx, eax
  8989  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  8990  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  8991  	WORD $0xc129                   // sub    ecx, eax
  8992  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  8993  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  8994  	WORD $0xc129                   // sub    ecx, eax
  8995  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  8996  	LONG $0x04c68348               // add    rsi, 4
  8997  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  8998  	JNE  LBB1_852
  8999  	JMP  LBB1_1069
  9000  
  9001  LBB1_178:
  9002  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9003  	JLE  LBB1_1069
  9004  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  9005  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9006  	LONG $0x10f98341         // cmp    r9d, 16
  9007  	JB   LBB1_180
  9008  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  9009  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9010  	JBE  LBB1_540
  9011  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  9012  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9013  	JBE  LBB1_540
  9014  
  9015  LBB1_180:
  9016  	WORD $0xf631 // xor    esi, esi
  9017  
  9018  LBB1_857:
  9019  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9020  	WORD $0xf749; BYTE $0xd1 // not    r9
  9021  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9022  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9023  	LONG $0x03e78348         // and    rdi, 3
  9024  	JE   LBB1_859
  9025  
  9026  LBB1_858:
  9027  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  9028  	WORD $0xc129                 // sub    ecx, eax
  9029  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  9030  	LONG $0x01c68348             // add    rsi, 1
  9031  	LONG $0xffc78348             // add    rdi, -1
  9032  	JNE  LBB1_858
  9033  
  9034  LBB1_859:
  9035  	LONG $0x03f98349 // cmp    r9, 3
  9036  	JB   LBB1_1069
  9037  
  9038  LBB1_860:
  9039  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  9040  	WORD $0xc129                   // sub    ecx, eax
  9041  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  9042  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  9043  	WORD $0xc129                   // sub    ecx, eax
  9044  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  9045  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  9046  	WORD $0xc129                   // sub    ecx, eax
  9047  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  9048  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  9049  	WORD $0xc129                   // sub    ecx, eax
  9050  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  9051  	LONG $0x04c68348               // add    rsi, 4
  9052  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  9053  	JNE  LBB1_860
  9054  	JMP  LBB1_1069
  9055  
  9056  LBB1_181:
  9057  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9058  	JLE  LBB1_1069
  9059  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  9060  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9061  	LONG $0x10f98341         // cmp    r9d, 16
  9062  	JB   LBB1_183
  9063  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  9064  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9065  	JBE  LBB1_543
  9066  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  9067  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9068  	JBE  LBB1_543
  9069  
  9070  LBB1_183:
  9071  	WORD $0xf631 // xor    esi, esi
  9072  
  9073  LBB1_865:
  9074  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9075  	WORD $0xf749; BYTE $0xd1 // not    r9
  9076  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9077  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9078  	LONG $0x03e78348         // and    rdi, 3
  9079  	JE   LBB1_867
  9080  
  9081  LBB1_866:
  9082  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  9083  	WORD $0x0166; BYTE $0xc1     // add    cx, ax
  9084  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  9085  	LONG $0x01c68348             // add    rsi, 1
  9086  	LONG $0xffc78348             // add    rdi, -1
  9087  	JNE  LBB1_866
  9088  
  9089  LBB1_867:
  9090  	LONG $0x03f98349 // cmp    r9, 3
  9091  	JB   LBB1_1069
  9092  
  9093  LBB1_868:
  9094  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  9095  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9096  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  9097  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  9098  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9099  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  9100  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  9101  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9102  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  9103  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  9104  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9105  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  9106  	LONG $0x04c68348               // add    rsi, 4
  9107  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  9108  	JNE  LBB1_868
  9109  	JMP  LBB1_1069
  9110  
  9111  LBB1_184:
  9112  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9113  	JLE  LBB1_1069
  9114  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  9115  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9116  	LONG $0x10f98341         // cmp    r9d, 16
  9117  	JB   LBB1_186
  9118  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  9119  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9120  	JBE  LBB1_546
  9121  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  9122  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9123  	JBE  LBB1_546
  9124  
  9125  LBB1_186:
  9126  	WORD $0xf631 // xor    esi, esi
  9127  
  9128  LBB1_873:
  9129  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9130  	WORD $0xf749; BYTE $0xd1 // not    r9
  9131  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9132  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9133  	LONG $0x03e78348         // and    rdi, 3
  9134  	JE   LBB1_875
  9135  
  9136  LBB1_874:
  9137  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  9138  	WORD $0x0166; BYTE $0xc1     // add    cx, ax
  9139  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  9140  	LONG $0x01c68348             // add    rsi, 1
  9141  	LONG $0xffc78348             // add    rdi, -1
  9142  	JNE  LBB1_874
  9143  
  9144  LBB1_875:
  9145  	LONG $0x03f98349 // cmp    r9, 3
  9146  	JB   LBB1_1069
  9147  
  9148  LBB1_876:
  9149  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  9150  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9151  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  9152  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  9153  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9154  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  9155  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  9156  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9157  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  9158  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  9159  	WORD $0x0166; BYTE $0xc1       // add    cx, ax
  9160  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  9161  	LONG $0x04c68348               // add    rsi, 4
  9162  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  9163  	JNE  LBB1_876
  9164  	JMP  LBB1_1069
  9165  
  9166  LBB1_187:
  9167  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9168  	JLE  LBB1_1069
  9169  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  9170  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9171  	LONG $0x10f98341         // cmp    r9d, 16
  9172  	JB   LBB1_189
  9173  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  9174  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9175  	JBE  LBB1_549
  9176  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  9177  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9178  	JBE  LBB1_549
  9179  
  9180  LBB1_189:
  9181  	WORD $0xf631 // xor    esi, esi
  9182  
  9183  LBB1_881:
  9184  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9185  	WORD $0xf749; BYTE $0xd1 // not    r9
  9186  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9187  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9188  	LONG $0x03e78348         // and    rdi, 3
  9189  	JE   LBB1_883
  9190  
  9191  LBB1_882:
  9192  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  9193  	WORD $0xc129                 // sub    ecx, eax
  9194  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  9195  	LONG $0x01c68348             // add    rsi, 1
  9196  	LONG $0xffc78348             // add    rdi, -1
  9197  	JNE  LBB1_882
  9198  
  9199  LBB1_883:
  9200  	LONG $0x03f98349 // cmp    r9, 3
  9201  	JB   LBB1_1069
  9202  
  9203  LBB1_884:
  9204  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  9205  	WORD $0xc129                   // sub    ecx, eax
  9206  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  9207  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  9208  	WORD $0xc129                   // sub    ecx, eax
  9209  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  9210  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  9211  	WORD $0xc129                   // sub    ecx, eax
  9212  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  9213  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  9214  	WORD $0xc129                   // sub    ecx, eax
  9215  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  9216  	LONG $0x04c68348               // add    rsi, 4
  9217  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  9218  	JNE  LBB1_884
  9219  	JMP  LBB1_1069
  9220  
  9221  LBB1_190:
  9222  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9223  	JLE  LBB1_1069
  9224  	WORD $0xb70f; BYTE $0x01 // movzx    eax, word [rcx]
  9225  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9226  	LONG $0x10f98341         // cmp    r9d, 16
  9227  	JB   LBB1_192
  9228  	LONG $0x520c8d4a         // lea    rcx, [rdx + 2*r10]
  9229  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9230  	JBE  LBB1_552
  9231  	LONG $0x500c8d4b         // lea    rcx, [r8 + 2*r10]
  9232  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9233  	JBE  LBB1_552
  9234  
  9235  LBB1_192:
  9236  	WORD $0xf631 // xor    esi, esi
  9237  
  9238  LBB1_889:
  9239  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9240  	WORD $0xf749; BYTE $0xd1 // not    r9
  9241  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9242  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9243  	LONG $0x03e78348         // and    rdi, 3
  9244  	JE   LBB1_891
  9245  
  9246  LBB1_890:
  9247  	LONG $0x720cb70f             // movzx    ecx, word [rdx + 2*rsi]
  9248  	WORD $0xc129                 // sub    ecx, eax
  9249  	LONG $0x0c894166; BYTE $0x70 // mov    word [r8 + 2*rsi], cx
  9250  	LONG $0x01c68348             // add    rsi, 1
  9251  	LONG $0xffc78348             // add    rdi, -1
  9252  	JNE  LBB1_890
  9253  
  9254  LBB1_891:
  9255  	LONG $0x03f98349 // cmp    r9, 3
  9256  	JB   LBB1_1069
  9257  
  9258  LBB1_892:
  9259  	LONG $0x720cb70f               // movzx    ecx, word [rdx + 2*rsi]
  9260  	WORD $0xc129                   // sub    ecx, eax
  9261  	LONG $0x0c894166; BYTE $0x70   // mov    word [r8 + 2*rsi], cx
  9262  	LONG $0x724cb70f; BYTE $0x02   // movzx    ecx, word [rdx + 2*rsi + 2]
  9263  	WORD $0xc129                   // sub    ecx, eax
  9264  	LONG $0x4c894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], cx
  9265  	LONG $0x724cb70f; BYTE $0x04   // movzx    ecx, word [rdx + 2*rsi + 4]
  9266  	WORD $0xc129                   // sub    ecx, eax
  9267  	LONG $0x4c894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], cx
  9268  	LONG $0x724cb70f; BYTE $0x06   // movzx    ecx, word [rdx + 2*rsi + 6]
  9269  	WORD $0xc129                   // sub    ecx, eax
  9270  	LONG $0x4c894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], cx
  9271  	LONG $0x04c68348               // add    rsi, 4
  9272  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
  9273  	JNE  LBB1_892
  9274  	JMP  LBB1_1069
  9275  
  9276  LBB1_193:
  9277  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9278  	JLE  LBB1_1069
  9279  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9280  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  9281  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  9282  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  9283  	LONG $0x03e18341         // and    r9d, 3
  9284  	LONG $0x03ff8348         // cmp    rdi, 3
  9285  	JAE  LBB1_377
  9286  	WORD $0xff31             // xor    edi, edi
  9287  	JMP  LBB1_379
  9288  
  9289  LBB1_196:
  9290  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9291  	JLE  LBB1_1069
  9292  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9293  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9294  	LONG $0x08f98341         // cmp    r9d, 8
  9295  	JB   LBB1_198
  9296  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9297  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9298  	JBE  LBB1_555
  9299  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9300  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9301  	JBE  LBB1_555
  9302  
  9303  LBB1_198:
  9304  	WORD $0xc931 // xor    ecx, ecx
  9305  
  9306  LBB1_897:
  9307  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9308  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9309  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9310  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9311  	LONG $0x03e78348         // and    rdi, 3
  9312  	JE   LBB1_899
  9313  
  9314  LBB1_898:
  9315  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9316  	LONG $0xc8590ff3               // mulss    xmm1, xmm0
  9317  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9318  	LONG $0x01c18348               // add    rcx, 1
  9319  	LONG $0xffc78348               // add    rdi, -1
  9320  	JNE  LBB1_898
  9321  
  9322  LBB1_899:
  9323  	LONG $0x03fe8348 // cmp    rsi, 3
  9324  	JB   LBB1_1069
  9325  
  9326  LBB1_900:
  9327  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9328  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9329  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9330  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9331  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9332  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9333  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9334  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9335  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9336  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9337  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9338  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9339  	LONG $0x04c18348                           // add    rcx, 4
  9340  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9341  	JNE  LBB1_900
  9342  	JMP  LBB1_1069
  9343  
  9344  LBB1_199:
  9345  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9346  	JLE  LBB1_1069
  9347  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9348  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
  9349  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
  9350  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
  9351  	LONG $0x03e18341         // and    r9d, 3
  9352  	LONG $0x03ff8348         // cmp    rdi, 3
  9353  	JAE  LBB1_385
  9354  	WORD $0xff31             // xor    edi, edi
  9355  	JMP  LBB1_387
  9356  
  9357  LBB1_202:
  9358  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9359  	JLE  LBB1_1069
  9360  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9361  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9362  	LONG $0x08f98341         // cmp    r9d, 8
  9363  	JB   LBB1_204
  9364  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9365  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9366  	JBE  LBB1_558
  9367  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9368  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9369  	JBE  LBB1_558
  9370  
  9371  LBB1_204:
  9372  	WORD $0xc931 // xor    ecx, ecx
  9373  
  9374  LBB1_905:
  9375  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9376  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9377  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9378  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9379  	LONG $0x03e78348         // and    rdi, 3
  9380  	JE   LBB1_907
  9381  
  9382  LBB1_906:
  9383  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9384  	LONG $0xc8590ff3               // mulss    xmm1, xmm0
  9385  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9386  	LONG $0x01c18348               // add    rcx, 1
  9387  	LONG $0xffc78348               // add    rdi, -1
  9388  	JNE  LBB1_906
  9389  
  9390  LBB1_907:
  9391  	LONG $0x03fe8348 // cmp    rsi, 3
  9392  	JB   LBB1_1069
  9393  
  9394  LBB1_908:
  9395  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9396  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9397  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9398  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9399  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9400  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9401  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9402  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9403  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9404  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9405  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
  9406  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9407  	LONG $0x04c18348                           // add    rcx, 4
  9408  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9409  	JNE  LBB1_908
  9410  	JMP  LBB1_1069
  9411  
  9412  LBB1_205:
  9413  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9414  	JLE  LBB1_1069
  9415  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9416  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9417  	LONG $0x04f98341         // cmp    r9d, 4
  9418  	JB   LBB1_207
  9419  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  9420  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9421  	JBE  LBB1_561
  9422  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  9423  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9424  	JBE  LBB1_561
  9425  
  9426  LBB1_207:
  9427  	WORD $0xf631 // xor    esi, esi
  9428  
  9429  LBB1_913:
  9430  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9431  	WORD $0xf749; BYTE $0xd1 // not    r9
  9432  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9433  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9434  	LONG $0x03e78348         // and    rdi, 3
  9435  	JE   LBB1_915
  9436  
  9437  LBB1_914:
  9438  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  9439  	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
  9440  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  9441  	LONG $0x01c68348         // add    rsi, 1
  9442  	LONG $0xffc78348         // add    rdi, -1
  9443  	JNE  LBB1_914
  9444  
  9445  LBB1_915:
  9446  	LONG $0x03f98349 // cmp    r9, 3
  9447  	JB   LBB1_1069
  9448  
  9449  LBB1_916:
  9450  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  9451  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9452  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  9453  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  9454  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9455  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  9456  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  9457  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9458  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  9459  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  9460  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9461  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  9462  	LONG $0x04c68348             // add    rsi, 4
  9463  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  9464  	JNE  LBB1_916
  9465  	JMP  LBB1_1069
  9466  
  9467  LBB1_208:
  9468  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9469  	JLE  LBB1_1069
  9470  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9471  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9472  	LONG $0x08f98341         // cmp    r9d, 8
  9473  	JB   LBB1_210
  9474  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9475  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9476  	JBE  LBB1_564
  9477  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9478  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9479  	JBE  LBB1_564
  9480  
  9481  LBB1_210:
  9482  	WORD $0xc931 // xor    ecx, ecx
  9483  
  9484  LBB1_921:
  9485  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9486  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9487  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9488  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9489  	LONG $0x03e78348         // and    rdi, 3
  9490  	JE   LBB1_923
  9491  
  9492  LBB1_922:
  9493  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9494  	LONG $0xc8580ff3               // addss    xmm1, xmm0
  9495  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9496  	LONG $0x01c18348               // add    rcx, 1
  9497  	LONG $0xffc78348               // add    rdi, -1
  9498  	JNE  LBB1_922
  9499  
  9500  LBB1_923:
  9501  	LONG $0x03fe8348 // cmp    rsi, 3
  9502  	JB   LBB1_1069
  9503  
  9504  LBB1_924:
  9505  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9506  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9507  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9508  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9509  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9510  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9511  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9512  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9513  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9514  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9515  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9516  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9517  	LONG $0x04c18348                           // add    rcx, 4
  9518  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9519  	JNE  LBB1_924
  9520  	JMP  LBB1_1069
  9521  
  9522  LBB1_211:
  9523  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9524  	JLE  LBB1_1069
  9525  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9526  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9527  	LONG $0x04f98341         // cmp    r9d, 4
  9528  	JB   LBB1_213
  9529  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  9530  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9531  	JBE  LBB1_567
  9532  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  9533  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9534  	JBE  LBB1_567
  9535  
  9536  LBB1_213:
  9537  	WORD $0xf631 // xor    esi, esi
  9538  
  9539  LBB1_929:
  9540  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9541  	WORD $0xf749; BYTE $0xd1 // not    r9
  9542  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9543  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9544  	LONG $0x03e78348         // and    rdi, 3
  9545  	JE   LBB1_931
  9546  
  9547  LBB1_930:
  9548  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  9549  	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
  9550  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  9551  	LONG $0x01c68348         // add    rsi, 1
  9552  	LONG $0xffc78348         // add    rdi, -1
  9553  	JNE  LBB1_930
  9554  
  9555  LBB1_931:
  9556  	LONG $0x03f98349 // cmp    r9, 3
  9557  	JB   LBB1_1069
  9558  
  9559  LBB1_932:
  9560  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  9561  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9562  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  9563  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  9564  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9565  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  9566  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  9567  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9568  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  9569  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  9570  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9571  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  9572  	LONG $0x04c68348             // add    rsi, 4
  9573  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  9574  	JNE  LBB1_932
  9575  	JMP  LBB1_1069
  9576  
  9577  LBB1_214:
  9578  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9579  	JLE  LBB1_1069
  9580  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9581  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9582  	LONG $0x08f98341         // cmp    r9d, 8
  9583  	JB   LBB1_216
  9584  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9585  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9586  	JBE  LBB1_570
  9587  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9588  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9589  	JBE  LBB1_570
  9590  
  9591  LBB1_216:
  9592  	WORD $0xc931 // xor    ecx, ecx
  9593  
  9594  LBB1_937:
  9595  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9596  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9597  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9598  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9599  	LONG $0x03e78348         // and    rdi, 3
  9600  	JE   LBB1_939
  9601  
  9602  LBB1_938:
  9603  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9604  	LONG $0xc85c0ff3               // subss    xmm1, xmm0
  9605  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9606  	LONG $0x01c18348               // add    rcx, 1
  9607  	LONG $0xffc78348               // add    rdi, -1
  9608  	JNE  LBB1_938
  9609  
  9610  LBB1_939:
  9611  	LONG $0x03fe8348 // cmp    rsi, 3
  9612  	JB   LBB1_1069
  9613  
  9614  LBB1_940:
  9615  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9616  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9617  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9618  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9619  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9620  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9621  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9622  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9623  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9624  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9625  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9626  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9627  	LONG $0x04c18348                           // add    rcx, 4
  9628  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9629  	JNE  LBB1_940
  9630  	JMP  LBB1_1069
  9631  
  9632  LBB1_217:
  9633  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9634  	JLE  LBB1_1069
  9635  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9636  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9637  	LONG $0x04f98341         // cmp    r9d, 4
  9638  	JB   LBB1_219
  9639  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  9640  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9641  	JBE  LBB1_573
  9642  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  9643  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9644  	JBE  LBB1_573
  9645  
  9646  LBB1_219:
  9647  	WORD $0xf631 // xor    esi, esi
  9648  
  9649  LBB1_945:
  9650  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9651  	WORD $0xf749; BYTE $0xd1 // not    r9
  9652  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9653  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9654  	LONG $0x03e78348         // and    rdi, 3
  9655  	JE   LBB1_947
  9656  
  9657  LBB1_946:
  9658  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  9659  	WORD $0x0148; BYTE $0xc1 // add    rcx, rax
  9660  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  9661  	LONG $0x01c68348         // add    rsi, 1
  9662  	LONG $0xffc78348         // add    rdi, -1
  9663  	JNE  LBB1_946
  9664  
  9665  LBB1_947:
  9666  	LONG $0x03f98349 // cmp    r9, 3
  9667  	JB   LBB1_1069
  9668  
  9669  LBB1_948:
  9670  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  9671  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9672  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  9673  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  9674  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9675  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  9676  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  9677  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9678  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  9679  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  9680  	WORD $0x0148; BYTE $0xc1     // add    rcx, rax
  9681  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  9682  	LONG $0x04c68348             // add    rsi, 4
  9683  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  9684  	JNE  LBB1_948
  9685  	JMP  LBB1_1069
  9686  
  9687  LBB1_220:
  9688  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9689  	JLE  LBB1_1069
  9690  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9691  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9692  	LONG $0x08f98341         // cmp    r9d, 8
  9693  	JB   LBB1_222
  9694  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9695  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9696  	JBE  LBB1_576
  9697  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9698  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9699  	JBE  LBB1_576
  9700  
  9701  LBB1_222:
  9702  	WORD $0xc931 // xor    ecx, ecx
  9703  
  9704  LBB1_953:
  9705  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9706  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9707  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9708  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9709  	LONG $0x03e78348         // and    rdi, 3
  9710  	JE   LBB1_955
  9711  
  9712  LBB1_954:
  9713  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9714  	LONG $0xc8580ff3               // addss    xmm1, xmm0
  9715  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9716  	LONG $0x01c18348               // add    rcx, 1
  9717  	LONG $0xffc78348               // add    rdi, -1
  9718  	JNE  LBB1_954
  9719  
  9720  LBB1_955:
  9721  	LONG $0x03fe8348 // cmp    rsi, 3
  9722  	JB   LBB1_1069
  9723  
  9724  LBB1_956:
  9725  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9726  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9727  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9728  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9729  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9730  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9731  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9732  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9733  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9734  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9735  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
  9736  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9737  	LONG $0x04c18348                           // add    rcx, 4
  9738  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9739  	JNE  LBB1_956
  9740  	JMP  LBB1_1069
  9741  
  9742  LBB1_223:
  9743  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9744  	JLE  LBB1_1069
  9745  	WORD $0x8b48; BYTE $0x01 // mov    rax, qword [rcx]
  9746  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9747  	LONG $0x04f98341         // cmp    r9d, 4
  9748  	JB   LBB1_225
  9749  	LONG $0xd20c8d4a         // lea    rcx, [rdx + 8*r10]
  9750  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9751  	JBE  LBB1_579
  9752  	LONG $0xd00c8d4b         // lea    rcx, [r8 + 8*r10]
  9753  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9754  	JBE  LBB1_579
  9755  
  9756  LBB1_225:
  9757  	WORD $0xf631 // xor    esi, esi
  9758  
  9759  LBB1_961:
  9760  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9761  	WORD $0xf749; BYTE $0xd1 // not    r9
  9762  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9763  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9764  	LONG $0x03e78348         // and    rdi, 3
  9765  	JE   LBB1_963
  9766  
  9767  LBB1_962:
  9768  	LONG $0xf20c8b48         // mov    rcx, qword [rdx + 8*rsi]
  9769  	WORD $0x2948; BYTE $0xc1 // sub    rcx, rax
  9770  	LONG $0xf00c8949         // mov    qword [r8 + 8*rsi], rcx
  9771  	LONG $0x01c68348         // add    rsi, 1
  9772  	LONG $0xffc78348         // add    rdi, -1
  9773  	JNE  LBB1_962
  9774  
  9775  LBB1_963:
  9776  	LONG $0x03f98349 // cmp    r9, 3
  9777  	JB   LBB1_1069
  9778  
  9779  LBB1_964:
  9780  	LONG $0xf20c8b48             // mov    rcx, qword [rdx + 8*rsi]
  9781  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9782  	LONG $0xf00c8949             // mov    qword [r8 + 8*rsi], rcx
  9783  	LONG $0xf24c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rsi + 8]
  9784  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9785  	LONG $0xf04c8949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rcx
  9786  	LONG $0xf24c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rsi + 16]
  9787  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9788  	LONG $0xf04c8949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rcx
  9789  	LONG $0xf24c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rsi + 24]
  9790  	WORD $0x2948; BYTE $0xc1     // sub    rcx, rax
  9791  	LONG $0xf04c8949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rcx
  9792  	LONG $0x04c68348             // add    rsi, 4
  9793  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
  9794  	JNE  LBB1_964
  9795  	JMP  LBB1_1069
  9796  
  9797  LBB1_226:
  9798  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9799  	JLE  LBB1_1069
  9800  	LONG $0x01100ff3         // movss    xmm0, dword [rcx]
  9801  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
  9802  	LONG $0x08f98341         // cmp    r9d, 8
  9803  	JB   LBB1_228
  9804  	LONG $0x820c8d48         // lea    rcx, [rdx + 4*rax]
  9805  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9806  	JBE  LBB1_582
  9807  	LONG $0x800c8d49         // lea    rcx, [r8 + 4*rax]
  9808  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9809  	JBE  LBB1_582
  9810  
  9811  LBB1_228:
  9812  	WORD $0xc931 // xor    ecx, ecx
  9813  
  9814  LBB1_969:
  9815  	WORD $0x8948; BYTE $0xce // mov    rsi, rcx
  9816  	WORD $0xf748; BYTE $0xd6 // not    rsi
  9817  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
  9818  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
  9819  	LONG $0x03e78348         // and    rdi, 3
  9820  	JE   LBB1_971
  9821  
  9822  LBB1_970:
  9823  	LONG $0x0c100ff3; BYTE $0x8a   // movss    xmm1, dword [rdx + 4*rcx]
  9824  	LONG $0xc85c0ff3               // subss    xmm1, xmm0
  9825  	LONG $0x110f41f3; WORD $0x880c // movss    dword [r8 + 4*rcx], xmm1
  9826  	LONG $0x01c18348               // add    rcx, 1
  9827  	LONG $0xffc78348               // add    rdi, -1
  9828  	JNE  LBB1_970
  9829  
  9830  LBB1_971:
  9831  	LONG $0x03fe8348 // cmp    rsi, 3
  9832  	JB   LBB1_1069
  9833  
  9834  LBB1_972:
  9835  	LONG $0x0c100ff3; BYTE $0x8a               // movss    xmm1, dword [rdx + 4*rcx]
  9836  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9837  	LONG $0x110f41f3; WORD $0x880c             // movss    dword [r8 + 4*rcx], xmm1
  9838  	LONG $0x4c100ff3; WORD $0x048a             // movss    xmm1, dword [rdx + 4*rcx + 4]
  9839  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9840  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x04 // movss    dword [r8 + 4*rcx + 4], xmm1
  9841  	LONG $0x4c100ff3; WORD $0x088a             // movss    xmm1, dword [rdx + 4*rcx + 8]
  9842  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9843  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x08 // movss    dword [r8 + 4*rcx + 8], xmm1
  9844  	LONG $0x4c100ff3; WORD $0x0c8a             // movss    xmm1, dword [rdx + 4*rcx + 12]
  9845  	LONG $0xc85c0ff3                           // subss    xmm1, xmm0
  9846  	LONG $0x110f41f3; WORD $0x884c; BYTE $0x0c // movss    dword [r8 + 4*rcx + 12], xmm1
  9847  	LONG $0x04c18348                           // add    rcx, 4
  9848  	WORD $0x3948; BYTE $0xc8                   // cmp    rax, rcx
  9849  	JNE  LBB1_972
  9850  	JMP  LBB1_1069
  9851  
  9852  LBB1_229:
  9853  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9854  	JLE  LBB1_1069
  9855  	WORD $0x098a             // mov    cl, byte [rcx]
  9856  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9857  	LONG $0x20f98341         // cmp    r9d, 32
  9858  	JB   LBB1_231
  9859  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  9860  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  9861  	JBE  LBB1_585
  9862  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
  9863  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  9864  	JBE  LBB1_585
  9865  
  9866  LBB1_231:
  9867  	WORD $0xff31 // xor    edi, edi
  9868  
  9869  LBB1_977:
  9870  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  9871  	WORD $0xf749; BYTE $0xd1 // not    r9
  9872  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9873  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  9874  	LONG $0x03e68348         // and    rsi, 3
  9875  	JE   LBB1_979
  9876  
  9877  LBB1_978:
  9878  	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
  9879  	WORD $0xe1f6     // mul    cl
  9880  	LONG $0x38048841 // mov    byte [r8 + rdi], al
  9881  	LONG $0x01c78348 // add    rdi, 1
  9882  	LONG $0xffc68348 // add    rsi, -1
  9883  	JNE  LBB1_978
  9884  
  9885  LBB1_979:
  9886  	LONG $0x03f98349 // cmp    r9, 3
  9887  	JB   LBB1_1069
  9888  
  9889  LBB1_980:
  9890  	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
  9891  	WORD $0xe1f6                 // mul    cl
  9892  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  9893  	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
  9894  	WORD $0xe1f6                 // mul    cl
  9895  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  9896  	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
  9897  	WORD $0xe1f6                 // mul    cl
  9898  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  9899  	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
  9900  	WORD $0xe1f6                 // mul    cl
  9901  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  9902  	LONG $0x04c78348             // add    rdi, 4
  9903  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  9904  	JNE  LBB1_980
  9905  	JMP  LBB1_1069
  9906  
  9907  LBB1_232:
  9908  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9909  	JLE  LBB1_1069
  9910  	WORD $0x098a             // mov    cl, byte [rcx]
  9911  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9912  	LONG $0x20f98341         // cmp    r9d, 32
  9913  	JB   LBB1_234
  9914  	LONG $0x12048d4a         // lea    rax, [rdx + r10]
  9915  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
  9916  	JBE  LBB1_588
  9917  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
  9918  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
  9919  	JBE  LBB1_588
  9920  
  9921  LBB1_234:
  9922  	WORD $0xff31 // xor    edi, edi
  9923  
  9924  LBB1_985:
  9925  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
  9926  	WORD $0xf749; BYTE $0xd1 // not    r9
  9927  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9928  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
  9929  	LONG $0x03e68348         // and    rsi, 3
  9930  	JE   LBB1_987
  9931  
  9932  LBB1_986:
  9933  	LONG $0x3a04b60f // movzx    eax, byte [rdx + rdi]
  9934  	WORD $0xe1f6     // mul    cl
  9935  	LONG $0x38048841 // mov    byte [r8 + rdi], al
  9936  	LONG $0x01c78348 // add    rdi, 1
  9937  	LONG $0xffc68348 // add    rsi, -1
  9938  	JNE  LBB1_986
  9939  
  9940  LBB1_987:
  9941  	LONG $0x03f98349 // cmp    r9, 3
  9942  	JB   LBB1_1069
  9943  
  9944  LBB1_988:
  9945  	LONG $0x3a04b60f             // movzx    eax, byte [rdx + rdi]
  9946  	WORD $0xe1f6                 // mul    cl
  9947  	LONG $0x38048841             // mov    byte [r8 + rdi], al
  9948  	LONG $0x3a44b60f; BYTE $0x01 // movzx    eax, byte [rdx + rdi + 1]
  9949  	WORD $0xe1f6                 // mul    cl
  9950  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
  9951  	LONG $0x3a44b60f; BYTE $0x02 // movzx    eax, byte [rdx + rdi + 2]
  9952  	WORD $0xe1f6                 // mul    cl
  9953  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
  9954  	LONG $0x3a44b60f; BYTE $0x03 // movzx    eax, byte [rdx + rdi + 3]
  9955  	WORD $0xe1f6                 // mul    cl
  9956  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
  9957  	LONG $0x04c78348             // add    rdi, 4
  9958  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
  9959  	JNE  LBB1_988
  9960  	JMP  LBB1_1069
  9961  
  9962  LBB1_235:
  9963  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
  9964  	JLE  LBB1_1069
  9965  	WORD $0x018a             // mov    al, byte [rcx]
  9966  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
  9967  	LONG $0x20f98341         // cmp    r9d, 32
  9968  	JB   LBB1_237
  9969  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
  9970  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
  9971  	JBE  LBB1_591
  9972  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
  9973  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
  9974  	JBE  LBB1_591
  9975  
  9976  LBB1_237:
  9977  	WORD $0xf631 // xor    esi, esi
  9978  
  9979  LBB1_993:
  9980  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
  9981  	WORD $0xf749; BYTE $0xd1 // not    r9
  9982  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
  9983  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
  9984  	LONG $0x03e78348         // and    rdi, 3
  9985  	JE   LBB1_995
  9986  
  9987  LBB1_994:
  9988  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
  9989  	WORD $0xc100     // add    cl, al
  9990  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
  9991  	LONG $0x01c68348 // add    rsi, 1
  9992  	LONG $0xffc78348 // add    rdi, -1
  9993  	JNE  LBB1_994
  9994  
  9995  LBB1_995:
  9996  	LONG $0x03f98349 // cmp    r9, 3
  9997  	JB   LBB1_1069
  9998  
  9999  LBB1_996:
 10000  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
 10001  	WORD $0xc100                 // add    cl, al
 10002  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
 10003  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
 10004  	WORD $0xc100                 // add    cl, al
 10005  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
 10006  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
 10007  	WORD $0xc100                 // add    cl, al
 10008  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
 10009  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
 10010  	WORD $0xc100                 // add    cl, al
 10011  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
 10012  	LONG $0x04c68348             // add    rsi, 4
 10013  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10014  	JNE  LBB1_996
 10015  	JMP  LBB1_1069
 10016  
 10017  LBB1_238:
 10018  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10019  	JLE  LBB1_1069
 10020  	WORD $0x018a             // mov    al, byte [rcx]
 10021  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10022  	LONG $0x20f98341         // cmp    r9d, 32
 10023  	JB   LBB1_240
 10024  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
 10025  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10026  	JBE  LBB1_594
 10027  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
 10028  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10029  	JBE  LBB1_594
 10030  
 10031  LBB1_240:
 10032  	WORD $0xf631 // xor    esi, esi
 10033  
 10034  LBB1_1001:
 10035  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10036  	WORD $0xf749; BYTE $0xd1 // not    r9
 10037  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10038  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10039  	LONG $0x03e78348         // and    rdi, 3
 10040  	JE   LBB1_1003
 10041  
 10042  LBB1_1002:
 10043  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
 10044  	WORD $0xc128     // sub    cl, al
 10045  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
 10046  	LONG $0x01c68348 // add    rsi, 1
 10047  	LONG $0xffc78348 // add    rdi, -1
 10048  	JNE  LBB1_1002
 10049  
 10050  LBB1_1003:
 10051  	LONG $0x03f98349 // cmp    r9, 3
 10052  	JB   LBB1_1069
 10053  
 10054  LBB1_1004:
 10055  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
 10056  	WORD $0xc128                 // sub    cl, al
 10057  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
 10058  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
 10059  	WORD $0xc128                 // sub    cl, al
 10060  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
 10061  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
 10062  	WORD $0xc128                 // sub    cl, al
 10063  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
 10064  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
 10065  	WORD $0xc128                 // sub    cl, al
 10066  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
 10067  	LONG $0x04c68348             // add    rsi, 4
 10068  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10069  	JNE  LBB1_1004
 10070  	JMP  LBB1_1069
 10071  
 10072  LBB1_241:
 10073  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10074  	JLE  LBB1_1069
 10075  	WORD $0x018a             // mov    al, byte [rcx]
 10076  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10077  	LONG $0x20f98341         // cmp    r9d, 32
 10078  	JB   LBB1_243
 10079  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
 10080  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10081  	JBE  LBB1_597
 10082  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
 10083  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10084  	JBE  LBB1_597
 10085  
 10086  LBB1_243:
 10087  	WORD $0xf631 // xor    esi, esi
 10088  
 10089  LBB1_1009:
 10090  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10091  	WORD $0xf749; BYTE $0xd1 // not    r9
 10092  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10093  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10094  	LONG $0x03e78348         // and    rdi, 3
 10095  	JE   LBB1_1011
 10096  
 10097  LBB1_1010:
 10098  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
 10099  	WORD $0xc100     // add    cl, al
 10100  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
 10101  	LONG $0x01c68348 // add    rsi, 1
 10102  	LONG $0xffc78348 // add    rdi, -1
 10103  	JNE  LBB1_1010
 10104  
 10105  LBB1_1011:
 10106  	LONG $0x03f98349 // cmp    r9, 3
 10107  	JB   LBB1_1069
 10108  
 10109  LBB1_1012:
 10110  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
 10111  	WORD $0xc100                 // add    cl, al
 10112  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
 10113  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
 10114  	WORD $0xc100                 // add    cl, al
 10115  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
 10116  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
 10117  	WORD $0xc100                 // add    cl, al
 10118  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
 10119  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
 10120  	WORD $0xc100                 // add    cl, al
 10121  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
 10122  	LONG $0x04c68348             // add    rsi, 4
 10123  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10124  	JNE  LBB1_1012
 10125  	JMP  LBB1_1069
 10126  
 10127  LBB1_244:
 10128  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10129  	JLE  LBB1_1069
 10130  	WORD $0x018a             // mov    al, byte [rcx]
 10131  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10132  	LONG $0x20f98341         // cmp    r9d, 32
 10133  	JB   LBB1_246
 10134  	LONG $0x120c8d4a         // lea    rcx, [rdx + r10]
 10135  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10136  	JBE  LBB1_600
 10137  	LONG $0x100c8d4b         // lea    rcx, [r8 + r10]
 10138  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10139  	JBE  LBB1_600
 10140  
 10141  LBB1_246:
 10142  	WORD $0xf631 // xor    esi, esi
 10143  
 10144  LBB1_1017:
 10145  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10146  	WORD $0xf749; BYTE $0xd1 // not    r9
 10147  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10148  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10149  	LONG $0x03e78348         // and    rdi, 3
 10150  	JE   LBB1_1019
 10151  
 10152  LBB1_1018:
 10153  	LONG $0x320cb60f // movzx    ecx, byte [rdx + rsi]
 10154  	WORD $0xc128     // sub    cl, al
 10155  	LONG $0x300c8841 // mov    byte [r8 + rsi], cl
 10156  	LONG $0x01c68348 // add    rsi, 1
 10157  	LONG $0xffc78348 // add    rdi, -1
 10158  	JNE  LBB1_1018
 10159  
 10160  LBB1_1019:
 10161  	LONG $0x03f98349 // cmp    r9, 3
 10162  	JB   LBB1_1069
 10163  
 10164  LBB1_1020:
 10165  	LONG $0x320cb60f             // movzx    ecx, byte [rdx + rsi]
 10166  	WORD $0xc128                 // sub    cl, al
 10167  	LONG $0x300c8841             // mov    byte [r8 + rsi], cl
 10168  	LONG $0x324cb60f; BYTE $0x01 // movzx    ecx, byte [rdx + rsi + 1]
 10169  	WORD $0xc128                 // sub    cl, al
 10170  	LONG $0x304c8841; BYTE $0x01 // mov    byte [r8 + rsi + 1], cl
 10171  	LONG $0x324cb60f; BYTE $0x02 // movzx    ecx, byte [rdx + rsi + 2]
 10172  	WORD $0xc128                 // sub    cl, al
 10173  	LONG $0x304c8841; BYTE $0x02 // mov    byte [r8 + rsi + 2], cl
 10174  	LONG $0x324cb60f; BYTE $0x03 // movzx    ecx, byte [rdx + rsi + 3]
 10175  	WORD $0xc128                 // sub    cl, al
 10176  	LONG $0x304c8841; BYTE $0x03 // mov    byte [r8 + rsi + 3], cl
 10177  	LONG $0x04c68348             // add    rsi, 4
 10178  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10179  	JNE  LBB1_1020
 10180  	JMP  LBB1_1069
 10181  
 10182  LBB1_247:
 10183  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10184  	JLE  LBB1_1069
 10185  	WORD $0x018b             // mov    eax, dword [rcx]
 10186  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10187  	LONG $0x08f98341         // cmp    r9d, 8
 10188  	JB   LBB1_249
 10189  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10190  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10191  	JBE  LBB1_603
 10192  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10193  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10194  	JBE  LBB1_603
 10195  
 10196  LBB1_249:
 10197  	WORD $0xf631 // xor    esi, esi
 10198  
 10199  LBB1_1025:
 10200  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10201  	WORD $0xf749; BYTE $0xd1 // not    r9
 10202  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10203  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10204  	LONG $0x03e78348         // and    rdi, 3
 10205  	JE   LBB1_1027
 10206  
 10207  LBB1_1026:
 10208  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10209  	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
 10210  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10211  	LONG $0x01c68348         // add    rsi, 1
 10212  	LONG $0xffc78348         // add    rdi, -1
 10213  	JNE  LBB1_1026
 10214  
 10215  LBB1_1027:
 10216  	LONG $0x03f98349 // cmp    r9, 3
 10217  	JB   LBB1_1069
 10218  
 10219  LBB1_1028:
 10220  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10221  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10222  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10223  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10224  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10225  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10226  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10227  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10228  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10229  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10230  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10231  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10232  	LONG $0x04c68348             // add    rsi, 4
 10233  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10234  	JNE  LBB1_1028
 10235  	JMP  LBB1_1069
 10236  
 10237  LBB1_250:
 10238  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10239  	JLE  LBB1_1069
 10240  	WORD $0x018b             // mov    eax, dword [rcx]
 10241  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10242  	LONG $0x08f98341         // cmp    r9d, 8
 10243  	JB   LBB1_252
 10244  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10245  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10246  	JBE  LBB1_606
 10247  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10248  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10249  	JBE  LBB1_606
 10250  
 10251  LBB1_252:
 10252  	WORD $0xf631 // xor    esi, esi
 10253  
 10254  LBB1_1033:
 10255  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10256  	WORD $0xf749; BYTE $0xd1 // not    r9
 10257  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10258  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10259  	LONG $0x03e78348         // and    rdi, 3
 10260  	JE   LBB1_1035
 10261  
 10262  LBB1_1034:
 10263  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10264  	WORD $0xaf0f; BYTE $0xc8 // imul    ecx, eax
 10265  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10266  	LONG $0x01c68348         // add    rsi, 1
 10267  	LONG $0xffc78348         // add    rdi, -1
 10268  	JNE  LBB1_1034
 10269  
 10270  LBB1_1035:
 10271  	LONG $0x03f98349 // cmp    r9, 3
 10272  	JB   LBB1_1069
 10273  
 10274  LBB1_1036:
 10275  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10276  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10277  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10278  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10279  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10280  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10281  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10282  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10283  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10284  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10285  	WORD $0xaf0f; BYTE $0xc8     // imul    ecx, eax
 10286  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10287  	LONG $0x04c68348             // add    rsi, 4
 10288  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10289  	JNE  LBB1_1036
 10290  	JMP  LBB1_1069
 10291  
 10292  LBB1_253:
 10293  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10294  	JLE  LBB1_1069
 10295  	WORD $0x018b             // mov    eax, dword [rcx]
 10296  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10297  	LONG $0x08f98341         // cmp    r9d, 8
 10298  	JB   LBB1_255
 10299  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10300  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10301  	JBE  LBB1_609
 10302  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10303  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10304  	JBE  LBB1_609
 10305  
 10306  LBB1_255:
 10307  	WORD $0xf631 // xor    esi, esi
 10308  
 10309  LBB1_1041:
 10310  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10311  	WORD $0xf749; BYTE $0xd1 // not    r9
 10312  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10313  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10314  	LONG $0x03e78348         // and    rdi, 3
 10315  	JE   LBB1_1043
 10316  
 10317  LBB1_1042:
 10318  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10319  	WORD $0xc101             // add    ecx, eax
 10320  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10321  	LONG $0x01c68348         // add    rsi, 1
 10322  	LONG $0xffc78348         // add    rdi, -1
 10323  	JNE  LBB1_1042
 10324  
 10325  LBB1_1043:
 10326  	LONG $0x03f98349 // cmp    r9, 3
 10327  	JB   LBB1_1069
 10328  
 10329  LBB1_1044:
 10330  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10331  	WORD $0xc101                 // add    ecx, eax
 10332  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10333  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10334  	WORD $0xc101                 // add    ecx, eax
 10335  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10336  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10337  	WORD $0xc101                 // add    ecx, eax
 10338  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10339  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10340  	WORD $0xc101                 // add    ecx, eax
 10341  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10342  	LONG $0x04c68348             // add    rsi, 4
 10343  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10344  	JNE  LBB1_1044
 10345  	JMP  LBB1_1069
 10346  
 10347  LBB1_256:
 10348  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10349  	JLE  LBB1_1069
 10350  	WORD $0x018b             // mov    eax, dword [rcx]
 10351  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10352  	LONG $0x08f98341         // cmp    r9d, 8
 10353  	JB   LBB1_258
 10354  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10355  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10356  	JBE  LBB1_612
 10357  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10358  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10359  	JBE  LBB1_612
 10360  
 10361  LBB1_258:
 10362  	WORD $0xf631 // xor    esi, esi
 10363  
 10364  LBB1_1049:
 10365  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10366  	WORD $0xf749; BYTE $0xd1 // not    r9
 10367  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10368  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10369  	LONG $0x03e78348         // and    rdi, 3
 10370  	JE   LBB1_1051
 10371  
 10372  LBB1_1050:
 10373  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10374  	WORD $0xc129             // sub    ecx, eax
 10375  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10376  	LONG $0x01c68348         // add    rsi, 1
 10377  	LONG $0xffc78348         // add    rdi, -1
 10378  	JNE  LBB1_1050
 10379  
 10380  LBB1_1051:
 10381  	LONG $0x03f98349 // cmp    r9, 3
 10382  	JB   LBB1_1069
 10383  
 10384  LBB1_1052:
 10385  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10386  	WORD $0xc129                 // sub    ecx, eax
 10387  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10388  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10389  	WORD $0xc129                 // sub    ecx, eax
 10390  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10391  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10392  	WORD $0xc129                 // sub    ecx, eax
 10393  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10394  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10395  	WORD $0xc129                 // sub    ecx, eax
 10396  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10397  	LONG $0x04c68348             // add    rsi, 4
 10398  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10399  	JNE  LBB1_1052
 10400  	JMP  LBB1_1069
 10401  
 10402  LBB1_259:
 10403  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10404  	JLE  LBB1_1069
 10405  	WORD $0x018b             // mov    eax, dword [rcx]
 10406  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10407  	LONG $0x08f98341         // cmp    r9d, 8
 10408  	JB   LBB1_261
 10409  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10410  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10411  	JBE  LBB1_615
 10412  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10413  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10414  	JBE  LBB1_615
 10415  
 10416  LBB1_261:
 10417  	WORD $0xf631 // xor    esi, esi
 10418  
 10419  LBB1_1057:
 10420  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10421  	WORD $0xf749; BYTE $0xd1 // not    r9
 10422  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10423  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10424  	LONG $0x03e78348         // and    rdi, 3
 10425  	JE   LBB1_1059
 10426  
 10427  LBB1_1058:
 10428  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10429  	WORD $0xc101             // add    ecx, eax
 10430  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10431  	LONG $0x01c68348         // add    rsi, 1
 10432  	LONG $0xffc78348         // add    rdi, -1
 10433  	JNE  LBB1_1058
 10434  
 10435  LBB1_1059:
 10436  	LONG $0x03f98349 // cmp    r9, 3
 10437  	JB   LBB1_1069
 10438  
 10439  LBB1_1060:
 10440  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10441  	WORD $0xc101                 // add    ecx, eax
 10442  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10443  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10444  	WORD $0xc101                 // add    ecx, eax
 10445  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10446  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10447  	WORD $0xc101                 // add    ecx, eax
 10448  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10449  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10450  	WORD $0xc101                 // add    ecx, eax
 10451  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10452  	LONG $0x04c68348             // add    rsi, 4
 10453  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10454  	JNE  LBB1_1060
 10455  	JMP  LBB1_1069
 10456  
 10457  LBB1_262:
 10458  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 10459  	JLE  LBB1_1069
 10460  	WORD $0x018b             // mov    eax, dword [rcx]
 10461  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 10462  	LONG $0x08f98341         // cmp    r9d, 8
 10463  	JB   LBB1_264
 10464  	LONG $0x920c8d4a         // lea    rcx, [rdx + 4*r10]
 10465  	WORD $0x394c; BYTE $0xc1 // cmp    rcx, r8
 10466  	JBE  LBB1_618
 10467  	LONG $0x900c8d4b         // lea    rcx, [r8 + 4*r10]
 10468  	WORD $0x3948; BYTE $0xd1 // cmp    rcx, rdx
 10469  	JBE  LBB1_618
 10470  
 10471  LBB1_264:
 10472  	WORD $0xf631 // xor    esi, esi
 10473  
 10474  LBB1_1065:
 10475  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10476  	WORD $0xf749; BYTE $0xd1 // not    r9
 10477  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 10478  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 10479  	LONG $0x03e78348         // and    rdi, 3
 10480  	JE   LBB1_1067
 10481  
 10482  LBB1_1066:
 10483  	WORD $0x0c8b; BYTE $0xb2 // mov    ecx, dword [rdx + 4*rsi]
 10484  	WORD $0xc129             // sub    ecx, eax
 10485  	LONG $0xb00c8941         // mov    dword [r8 + 4*rsi], ecx
 10486  	LONG $0x01c68348         // add    rsi, 1
 10487  	LONG $0xffc78348         // add    rdi, -1
 10488  	JNE  LBB1_1066
 10489  
 10490  LBB1_1067:
 10491  	LONG $0x03f98349 // cmp    r9, 3
 10492  	JB   LBB1_1069
 10493  
 10494  LBB1_1068:
 10495  	WORD $0x0c8b; BYTE $0xb2     // mov    ecx, dword [rdx + 4*rsi]
 10496  	WORD $0xc129                 // sub    ecx, eax
 10497  	LONG $0xb00c8941             // mov    dword [r8 + 4*rsi], ecx
 10498  	LONG $0x04b24c8b             // mov    ecx, dword [rdx + 4*rsi + 4]
 10499  	WORD $0xc129                 // sub    ecx, eax
 10500  	LONG $0xb04c8941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], ecx
 10501  	LONG $0x08b24c8b             // mov    ecx, dword [rdx + 4*rsi + 8]
 10502  	WORD $0xc129                 // sub    ecx, eax
 10503  	LONG $0xb04c8941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], ecx
 10504  	LONG $0x0cb24c8b             // mov    ecx, dword [rdx + 4*rsi + 12]
 10505  	WORD $0xc129                 // sub    ecx, eax
 10506  	LONG $0xb04c8941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], ecx
 10507  	LONG $0x04c68348             // add    rsi, 4
 10508  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 10509  	JNE  LBB1_1068
 10510  	JMP  LBB1_1069
 10511  
 10512  LBB1_319:
 10513  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 10514  	WORD $0xff31             // xor    edi, edi
 10515  
 10516  LBB1_320:
 10517  	LONG $0xfa0c8b48             // mov    rcx, qword [rdx + 8*rdi]
 10518  	LONG $0xc8af0f48             // imul    rcx, rax
 10519  	LONG $0xf80c8949             // mov    qword [r8 + 8*rdi], rcx
 10520  	LONG $0xfa4c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rdi + 8]
 10521  	LONG $0xc8af0f48             // imul    rcx, rax
 10522  	LONG $0xf84c8949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rcx
 10523  	LONG $0xfa4c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rdi + 16]
 10524  	LONG $0xc8af0f48             // imul    rcx, rax
 10525  	LONG $0xf84c8949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rcx
 10526  	LONG $0xfa4c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rdi + 24]
 10527  	LONG $0xc8af0f48             // imul    rcx, rax
 10528  	LONG $0xf84c8949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rcx
 10529  	LONG $0x04c78348             // add    rdi, 4
 10530  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 10531  	JNE  LBB1_320
 10532  
 10533  LBB1_321:
 10534  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 10535  	JE   LBB1_1069
 10536  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 10537  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
 10538  	WORD $0xff31             // xor    edi, edi
 10539  
 10540  LBB1_323:
 10541  	LONG $0xfa0c8b48         // mov    rcx, qword [rdx + 8*rdi]
 10542  	LONG $0xc8af0f48         // imul    rcx, rax
 10543  	LONG $0xfe0c8948         // mov    qword [rsi + 8*rdi], rcx
 10544  	LONG $0x01c78348         // add    rdi, 1
 10545  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 10546  	JNE  LBB1_323
 10547  	JMP  LBB1_1069
 10548  
 10549  LBB1_324:
 10550  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 10551  	WORD $0xff31             // xor    edi, edi
 10552  
 10553  LBB1_325:
 10554  	LONG $0xfa0c8b48             // mov    rcx, qword [rdx + 8*rdi]
 10555  	LONG $0xc8af0f48             // imul    rcx, rax
 10556  	LONG $0xf80c8949             // mov    qword [r8 + 8*rdi], rcx
 10557  	LONG $0xfa4c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rdi + 8]
 10558  	LONG $0xc8af0f48             // imul    rcx, rax
 10559  	LONG $0xf84c8949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rcx
 10560  	LONG $0xfa4c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rdi + 16]
 10561  	LONG $0xc8af0f48             // imul    rcx, rax
 10562  	LONG $0xf84c8949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rcx
 10563  	LONG $0xfa4c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rdi + 24]
 10564  	LONG $0xc8af0f48             // imul    rcx, rax
 10565  	LONG $0xf84c8949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rcx
 10566  	LONG $0x04c78348             // add    rdi, 4
 10567  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 10568  	JNE  LBB1_325
 10569  
 10570  LBB1_326:
 10571  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 10572  	JE   LBB1_1069
 10573  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 10574  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
 10575  	WORD $0xff31             // xor    edi, edi
 10576  
 10577  LBB1_328:
 10578  	LONG $0xfa0c8b48         // mov    rcx, qword [rdx + 8*rdi]
 10579  	LONG $0xc8af0f48         // imul    rcx, rax
 10580  	LONG $0xfe0c8948         // mov    qword [rsi + 8*rdi], rcx
 10581  	LONG $0x01c78348         // add    rdi, 1
 10582  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 10583  	JNE  LBB1_328
 10584  	JMP  LBB1_1069
 10585  
 10586  LBB1_377:
 10587  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 10588  	WORD $0xff31             // xor    edi, edi
 10589  
 10590  LBB1_378:
 10591  	LONG $0xfa0c8b48             // mov    rcx, qword [rdx + 8*rdi]
 10592  	LONG $0xc8af0f48             // imul    rcx, rax
 10593  	LONG $0xf80c8949             // mov    qword [r8 + 8*rdi], rcx
 10594  	LONG $0xfa4c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rdi + 8]
 10595  	LONG $0xc8af0f48             // imul    rcx, rax
 10596  	LONG $0xf84c8949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rcx
 10597  	LONG $0xfa4c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rdi + 16]
 10598  	LONG $0xc8af0f48             // imul    rcx, rax
 10599  	LONG $0xf84c8949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rcx
 10600  	LONG $0xfa4c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rdi + 24]
 10601  	LONG $0xc8af0f48             // imul    rcx, rax
 10602  	LONG $0xf84c8949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rcx
 10603  	LONG $0x04c78348             // add    rdi, 4
 10604  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 10605  	JNE  LBB1_378
 10606  
 10607  LBB1_379:
 10608  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 10609  	JE   LBB1_1069
 10610  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 10611  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
 10612  	WORD $0xff31             // xor    edi, edi
 10613  
 10614  LBB1_381:
 10615  	LONG $0xfa0c8b48         // mov    rcx, qword [rdx + 8*rdi]
 10616  	LONG $0xc8af0f48         // imul    rcx, rax
 10617  	LONG $0xfe0c8948         // mov    qword [rsi + 8*rdi], rcx
 10618  	LONG $0x01c78348         // add    rdi, 1
 10619  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 10620  	JNE  LBB1_381
 10621  	JMP  LBB1_1069
 10622  
 10623  LBB1_385:
 10624  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 10625  	WORD $0xff31             // xor    edi, edi
 10626  
 10627  LBB1_386:
 10628  	LONG $0xfa0c8b48             // mov    rcx, qword [rdx + 8*rdi]
 10629  	LONG $0xc8af0f48             // imul    rcx, rax
 10630  	LONG $0xf80c8949             // mov    qword [r8 + 8*rdi], rcx
 10631  	LONG $0xfa4c8b48; BYTE $0x08 // mov    rcx, qword [rdx + 8*rdi + 8]
 10632  	LONG $0xc8af0f48             // imul    rcx, rax
 10633  	LONG $0xf84c8949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rcx
 10634  	LONG $0xfa4c8b48; BYTE $0x10 // mov    rcx, qword [rdx + 8*rdi + 16]
 10635  	LONG $0xc8af0f48             // imul    rcx, rax
 10636  	LONG $0xf84c8949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rcx
 10637  	LONG $0xfa4c8b48; BYTE $0x18 // mov    rcx, qword [rdx + 8*rdi + 24]
 10638  	LONG $0xc8af0f48             // imul    rcx, rax
 10639  	LONG $0xf84c8949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rcx
 10640  	LONG $0x04c78348             // add    rdi, 4
 10641  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 10642  	JNE  LBB1_386
 10643  
 10644  LBB1_387:
 10645  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 10646  	JE   LBB1_1069
 10647  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 10648  	LONG $0xfa148d48         // lea    rdx, [rdx + 8*rdi]
 10649  	WORD $0xff31             // xor    edi, edi
 10650  
 10651  LBB1_389:
 10652  	LONG $0xfa0c8b48         // mov    rcx, qword [rdx + 8*rdi]
 10653  	LONG $0xc8af0f48         // imul    rcx, rax
 10654  	LONG $0xfe0c8948         // mov    qword [rsi + 8*rdi], rcx
 10655  	LONG $0x01c78348         // add    rdi, 1
 10656  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 10657  	JNE  LBB1_389
 10658  
 10659  LBB1_1069:
 10660  	RET
 10661  
 10662  LBB1_453:
 10663  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10664  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10665  	LONG $0xc06e0f66             // movd    xmm0, eax
 10666  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10667  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10668  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10669  	LONG $0x03e9c149             // shr    r9, 3
 10670  	LONG $0x01c18349             // add    r9, 1
 10671  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10672  	JE   LBB1_621
 10673  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10674  	LONG $0xfee18348             // and    rcx, -2
 10675  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10676  	WORD $0xff31                 // xor    edi, edi
 10677  
 10678  LBB1_455:
 10679  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10680  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10681  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 10682  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 10683  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10684  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10685  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10686  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10687  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 10688  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 10689  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10690  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10691  	LONG $0x10c78348                           // add    rdi, 16
 10692  	LONG $0x02c18348                           // add    rcx, 2
 10693  	JNE  LBB1_455
 10694  	JMP  LBB1_622
 10695  
 10696  LBB1_456:
 10697  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10698  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10699  	LONG $0xc06e0f66             // movd    xmm0, eax
 10700  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10701  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10702  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10703  	LONG $0x03e9c149             // shr    r9, 3
 10704  	LONG $0x01c18349             // add    r9, 1
 10705  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10706  	JE   LBB1_629
 10707  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10708  	LONG $0xfee18348             // and    rcx, -2
 10709  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10710  	WORD $0xff31                 // xor    edi, edi
 10711  
 10712  LBB1_458:
 10713  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10714  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10715  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 10716  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 10717  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10718  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10719  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10720  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10721  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 10722  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 10723  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10724  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10725  	LONG $0x10c78348                           // add    rdi, 16
 10726  	LONG $0x02c18348                           // add    rcx, 2
 10727  	JNE  LBB1_458
 10728  	JMP  LBB1_630
 10729  
 10730  LBB1_459:
 10731  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10732  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10733  	LONG $0xc06e0f66             // movd    xmm0, eax
 10734  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10735  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10736  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10737  	LONG $0x03e9c149             // shr    r9, 3
 10738  	LONG $0x01c18349             // add    r9, 1
 10739  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10740  	JE   LBB1_637
 10741  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10742  	LONG $0xfee18348             // and    rcx, -2
 10743  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10744  	WORD $0xff31                 // xor    edi, edi
 10745  
 10746  LBB1_461:
 10747  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10748  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10749  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 10750  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 10751  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10752  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10753  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10754  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10755  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 10756  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 10757  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10758  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10759  	LONG $0x10c78348                           // add    rdi, 16
 10760  	LONG $0x02c18348                           // add    rcx, 2
 10761  	JNE  LBB1_461
 10762  	JMP  LBB1_638
 10763  
 10764  LBB1_462:
 10765  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10766  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10767  	LONG $0xc06e0f66             // movd    xmm0, eax
 10768  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10769  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10770  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10771  	LONG $0x03e9c149             // shr    r9, 3
 10772  	LONG $0x01c18349             // add    r9, 1
 10773  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10774  	JE   LBB1_645
 10775  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10776  	LONG $0xfee18348             // and    rcx, -2
 10777  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10778  	WORD $0xff31                 // xor    edi, edi
 10779  
 10780  LBB1_464:
 10781  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10782  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10783  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 10784  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 10785  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10786  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10787  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10788  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10789  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 10790  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 10791  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10792  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10793  	LONG $0x10c78348                           // add    rdi, 16
 10794  	LONG $0x02c18348                           // add    rcx, 2
 10795  	JNE  LBB1_464
 10796  	JMP  LBB1_646
 10797  
 10798  LBB1_465:
 10799  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10800  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10801  	LONG $0xc06e0f66             // movd    xmm0, eax
 10802  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10803  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10804  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10805  	LONG $0x03e9c149             // shr    r9, 3
 10806  	LONG $0x01c18349             // add    r9, 1
 10807  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10808  	JE   LBB1_653
 10809  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10810  	LONG $0xfee18348             // and    rcx, -2
 10811  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10812  	WORD $0xff31                 // xor    edi, edi
 10813  
 10814  LBB1_467:
 10815  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10816  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10817  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 10818  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 10819  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10820  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10821  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10822  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10823  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 10824  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 10825  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10826  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10827  	LONG $0x10c78348                           // add    rdi, 16
 10828  	LONG $0x02c18348                           // add    rcx, 2
 10829  	JNE  LBB1_467
 10830  	JMP  LBB1_654
 10831  
 10832  LBB1_468:
 10833  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 10834  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 10835  	LONG $0xc06e0f66             // movd    xmm0, eax
 10836  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 10837  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 10838  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 10839  	LONG $0x03e9c149             // shr    r9, 3
 10840  	LONG $0x01c18349             // add    r9, 1
 10841  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 10842  	JE   LBB1_661
 10843  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 10844  	LONG $0xfee18348             // and    rcx, -2
 10845  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 10846  	WORD $0xff31                 // xor    edi, edi
 10847  
 10848  LBB1_470:
 10849  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 10850  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 10851  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 10852  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 10853  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 10854  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 10855  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 10856  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 10857  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 10858  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 10859  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 10860  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 10861  	LONG $0x10c78348                           // add    rdi, 16
 10862  	LONG $0x02c18348                           // add    rcx, 2
 10863  	JNE  LBB1_470
 10864  	JMP  LBB1_662
 10865  
 10866  LBB1_471:
 10867  	WORD $0xc189             // mov    ecx, eax
 10868  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 10869  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 10870  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 10871  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10872  	LONG $0x02e9c149         // shr    r9, 2
 10873  	LONG $0x01c18349         // add    r9, 1
 10874  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 10875  	JE   LBB1_669
 10876  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 10877  	LONG $0xfee68348         // and    rsi, -2
 10878  	WORD $0xf748; BYTE $0xde // neg    rsi
 10879  	WORD $0xff31             // xor    edi, edi
 10880  
 10881  LBB1_473:
 10882  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 10883  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 10884  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 10885  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 10886  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 10887  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 10888  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 10889  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 10890  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 10891  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 10892  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 10893  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 10894  	LONG $0x08c78348                           // add    rdi, 8
 10895  	LONG $0x02c68348                           // add    rsi, 2
 10896  	JNE  LBB1_473
 10897  	JMP  LBB1_670
 10898  
 10899  LBB1_474:
 10900  	WORD $0xc189             // mov    ecx, eax
 10901  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 10902  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 10903  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 10904  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10905  	LONG $0x02e9c149         // shr    r9, 2
 10906  	LONG $0x01c18349         // add    r9, 1
 10907  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 10908  	JE   LBB1_677
 10909  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 10910  	LONG $0xfee68348         // and    rsi, -2
 10911  	WORD $0xf748; BYTE $0xde // neg    rsi
 10912  	WORD $0xff31             // xor    edi, edi
 10913  
 10914  LBB1_476:
 10915  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 10916  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 10917  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 10918  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 10919  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 10920  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 10921  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 10922  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 10923  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 10924  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 10925  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 10926  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 10927  	LONG $0x08c78348                           // add    rdi, 8
 10928  	LONG $0x02c68348                           // add    rsi, 2
 10929  	JNE  LBB1_476
 10930  	JMP  LBB1_678
 10931  
 10932  LBB1_477:
 10933  	WORD $0xc189             // mov    ecx, eax
 10934  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 10935  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 10936  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 10937  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10938  	LONG $0x02e9c149         // shr    r9, 2
 10939  	LONG $0x01c18349         // add    r9, 1
 10940  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 10941  	JE   LBB1_685
 10942  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 10943  	LONG $0xfee68348         // and    rsi, -2
 10944  	WORD $0xf748; BYTE $0xde // neg    rsi
 10945  	WORD $0xff31             // xor    edi, edi
 10946  
 10947  LBB1_479:
 10948  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 10949  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 10950  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 10951  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 10952  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 10953  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 10954  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 10955  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 10956  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 10957  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 10958  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 10959  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 10960  	LONG $0x08c78348                           // add    rdi, 8
 10961  	LONG $0x02c68348                           // add    rsi, 2
 10962  	JNE  LBB1_479
 10963  	JMP  LBB1_686
 10964  
 10965  LBB1_480:
 10966  	WORD $0xc189             // mov    ecx, eax
 10967  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 10968  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 10969  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 10970  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 10971  	LONG $0x02e9c149         // shr    r9, 2
 10972  	LONG $0x01c18349         // add    r9, 1
 10973  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 10974  	JE   LBB1_693
 10975  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 10976  	LONG $0xfee68348         // and    rsi, -2
 10977  	WORD $0xf748; BYTE $0xde // neg    rsi
 10978  	WORD $0xff31             // xor    edi, edi
 10979  
 10980  LBB1_482:
 10981  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 10982  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 10983  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 10984  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 10985  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 10986  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 10987  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 10988  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 10989  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 10990  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 10991  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 10992  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 10993  	LONG $0x08c78348                           // add    rdi, 8
 10994  	LONG $0x02c68348                           // add    rsi, 2
 10995  	JNE  LBB1_482
 10996  	JMP  LBB1_694
 10997  
 10998  LBB1_483:
 10999  	WORD $0xc189             // mov    ecx, eax
 11000  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 11001  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 11002  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 11003  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 11004  	LONG $0x02e9c149         // shr    r9, 2
 11005  	LONG $0x01c18349         // add    r9, 1
 11006  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 11007  	JE   LBB1_701
 11008  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 11009  	LONG $0xfee68348         // and    rsi, -2
 11010  	WORD $0xf748; BYTE $0xde // neg    rsi
 11011  	WORD $0xff31             // xor    edi, edi
 11012  
 11013  LBB1_485:
 11014  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 11015  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 11016  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 11017  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 11018  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 11019  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 11020  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 11021  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 11022  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 11023  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 11024  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 11025  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 11026  	LONG $0x08c78348                           // add    rdi, 8
 11027  	LONG $0x02c68348                           // add    rsi, 2
 11028  	JNE  LBB1_485
 11029  	JMP  LBB1_702
 11030  
 11031  LBB1_486:
 11032  	WORD $0xc189             // mov    ecx, eax
 11033  	WORD $0xe183; BYTE $0xfc // and    ecx, -4
 11034  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 11035  	LONG $0xfc718d48         // lea    rsi, [rcx - 4]
 11036  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 11037  	LONG $0x02e9c149         // shr    r9, 2
 11038  	LONG $0x01c18349         // add    r9, 1
 11039  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 11040  	JE   LBB1_709
 11041  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 11042  	LONG $0xfee68348         // and    rsi, -2
 11043  	WORD $0xf748; BYTE $0xde // neg    rsi
 11044  	WORD $0xff31             // xor    edi, edi
 11045  
 11046  LBB1_488:
 11047  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 11048  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 11049  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 11050  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 11051  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 11052  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 11053  	LONG $0x54100f66; WORD $0x20fa             // movupd    xmm2, oword [rdx + 8*rdi + 32]
 11054  	LONG $0x5c100f66; WORD $0x30fa             // movupd    xmm3, oword [rdx + 8*rdi + 48]
 11055  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 11056  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 11057  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 11058  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 11059  	LONG $0x08c78348                           // add    rdi, 8
 11060  	LONG $0x02c68348                           // add    rsi, 2
 11061  	JNE  LBB1_488
 11062  	JMP  LBB1_710
 11063  
 11064  LBB1_489:
 11065  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 11066  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 11067  	WORD $0xb60f; BYTE $0xc1     // movzx    eax, cl
 11068  	LONG $0xc06e0f66             // movd    xmm0, eax
 11069  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11070  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11071  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 11072  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 11073  	LONG $0x05e9c149             // shr    r9, 5
 11074  	LONG $0x01c18349             // add    r9, 1
 11075  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 11076  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 11077  	JE   LBB1_717
 11078  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 11079  	LONG $0xfee68348             // and    rsi, -2
 11080  	WORD $0xf748; BYTE $0xde     // neg    rsi
 11081  	WORD $0xc031                 // xor    eax, eax
 11082  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 11083  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 11084  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI1_0] */
 11085  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 11086  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 11087  
 11088  LBB1_491:
 11089  	LONG $0x2c6f0ff3; BYTE $0x02               // movdqu    xmm5, oword [rdx + rax]
 11090  	LONG $0x746f0ff3; WORD $0x1002             // movdqu    xmm6, oword [rdx + rax + 16]
 11091  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 11092  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 11093  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 11094  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11095  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 11096  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 11097  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 11098  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 11099  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 11100  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 11101  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 11102  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 11103  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11104  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 11105  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 11106  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 11107  	LONG $0x6c6f0ff3; WORD $0x2002             // movdqu    xmm5, oword [rdx + rax + 32]
 11108  	LONG $0x746f0ff3; WORD $0x3002             // movdqu    xmm6, oword [rdx + rax + 48]
 11109  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 11110  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 11111  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 11112  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11113  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 11114  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 11115  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 11116  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 11117  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 11118  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 11119  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 11120  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 11121  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11122  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 11123  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 11124  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 11125  	LONG $0x40c08348                           // add    rax, 64
 11126  	LONG $0x02c68348                           // add    rsi, 2
 11127  	JNE  LBB1_491
 11128  	JMP  LBB1_718
 11129  
 11130  LBB1_492:
 11131  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 11132  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 11133  	WORD $0xb60f; BYTE $0xc1     // movzx    eax, cl
 11134  	LONG $0xc06e0f66             // movd    xmm0, eax
 11135  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11136  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11137  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 11138  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 11139  	LONG $0x05e9c149             // shr    r9, 5
 11140  	LONG $0x01c18349             // add    r9, 1
 11141  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 11142  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 11143  	JE   LBB1_725
 11144  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 11145  	LONG $0xfee68348             // and    rsi, -2
 11146  	WORD $0xf748; BYTE $0xde     // neg    rsi
 11147  	WORD $0xc031                 // xor    eax, eax
 11148  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 11149  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 11150  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI1_0] */
 11151  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 11152  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 11153  
 11154  LBB1_494:
 11155  	LONG $0x2c6f0ff3; BYTE $0x02               // movdqu    xmm5, oword [rdx + rax]
 11156  	LONG $0x746f0ff3; WORD $0x1002             // movdqu    xmm6, oword [rdx + rax + 16]
 11157  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 11158  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 11159  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 11160  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11161  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 11162  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 11163  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 11164  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 11165  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 11166  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 11167  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 11168  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 11169  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11170  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 11171  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 11172  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 11173  	LONG $0x6c6f0ff3; WORD $0x2002             // movdqu    xmm5, oword [rdx + rax + 32]
 11174  	LONG $0x746f0ff3; WORD $0x3002             // movdqu    xmm6, oword [rdx + rax + 48]
 11175  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 11176  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 11177  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 11178  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11179  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 11180  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 11181  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 11182  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 11183  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 11184  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 11185  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 11186  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 11187  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 11188  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 11189  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 11190  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 11191  	LONG $0x40c08348                           // add    rax, 64
 11192  	LONG $0x02c68348                           // add    rsi, 2
 11193  	JNE  LBB1_494
 11194  	JMP  LBB1_726
 11195  
 11196  LBB1_495:
 11197  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11198  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 11199  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 11200  	LONG $0xc16e0f66             // movd    xmm0, ecx
 11201  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11202  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11203  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 11204  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11205  	LONG $0x05e9c149             // shr    r9, 5
 11206  	LONG $0x01c18349             // add    r9, 1
 11207  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11208  	JE   LBB1_733
 11209  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11210  	LONG $0xfee18348             // and    rcx, -2
 11211  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11212  	WORD $0xff31                 // xor    edi, edi
 11213  
 11214  LBB1_497:
 11215  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 11216  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 11217  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 11218  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 11219  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 11220  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 11221  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 11222  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 11223  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 11224  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 11225  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 11226  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 11227  	LONG $0x40c78348                           // add    rdi, 64
 11228  	LONG $0x02c18348                           // add    rcx, 2
 11229  	JNE  LBB1_497
 11230  	JMP  LBB1_734
 11231  
 11232  LBB1_498:
 11233  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11234  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 11235  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 11236  	LONG $0xc16e0f66             // movd    xmm0, ecx
 11237  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11238  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11239  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 11240  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11241  	LONG $0x05e9c149             // shr    r9, 5
 11242  	LONG $0x01c18349             // add    r9, 1
 11243  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11244  	JE   LBB1_741
 11245  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11246  	LONG $0xfee18348             // and    rcx, -2
 11247  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11248  	WORD $0xff31                 // xor    edi, edi
 11249  
 11250  LBB1_500:
 11251  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 11252  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 11253  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 11254  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 11255  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 11256  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 11257  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 11258  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 11259  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 11260  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 11261  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 11262  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 11263  	LONG $0x40c78348                           // add    rdi, 64
 11264  	LONG $0x02c18348                           // add    rcx, 2
 11265  	JNE  LBB1_500
 11266  	JMP  LBB1_742
 11267  
 11268  LBB1_501:
 11269  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11270  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 11271  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 11272  	LONG $0xc16e0f66             // movd    xmm0, ecx
 11273  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11274  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11275  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 11276  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11277  	LONG $0x05e9c149             // shr    r9, 5
 11278  	LONG $0x01c18349             // add    r9, 1
 11279  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11280  	JE   LBB1_749
 11281  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11282  	LONG $0xfee18348             // and    rcx, -2
 11283  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11284  	WORD $0xff31                 // xor    edi, edi
 11285  
 11286  LBB1_503:
 11287  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 11288  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 11289  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 11290  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 11291  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 11292  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 11293  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 11294  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 11295  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 11296  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 11297  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 11298  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 11299  	LONG $0x40c78348                           // add    rdi, 64
 11300  	LONG $0x02c18348                           // add    rcx, 2
 11301  	JNE  LBB1_503
 11302  	JMP  LBB1_750
 11303  
 11304  LBB1_504:
 11305  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11306  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 11307  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 11308  	LONG $0xc16e0f66             // movd    xmm0, ecx
 11309  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 11310  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 11311  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 11312  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11313  	LONG $0x05e9c149             // shr    r9, 5
 11314  	LONG $0x01c18349             // add    r9, 1
 11315  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11316  	JE   LBB1_757
 11317  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11318  	LONG $0xfee18348             // and    rcx, -2
 11319  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11320  	WORD $0xff31                 // xor    edi, edi
 11321  
 11322  LBB1_506:
 11323  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 11324  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 11325  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 11326  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 11327  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 11328  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 11329  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 11330  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 11331  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 11332  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 11333  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 11334  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 11335  	LONG $0x40c78348                           // add    rdi, 64
 11336  	LONG $0x02c18348                           // add    rcx, 2
 11337  	JNE  LBB1_506
 11338  	JMP  LBB1_758
 11339  
 11340  LBB1_507:
 11341  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11342  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 11343  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 11344  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 11345  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 11346  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11347  	LONG $0x02e9c149             // shr    r9, 2
 11348  	LONG $0x01c18349             // add    r9, 1
 11349  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11350  	JE   LBB1_765
 11351  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11352  	LONG $0xfee18348             // and    rcx, -2
 11353  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11354  	WORD $0xff31                 // xor    edi, edi
 11355  
 11356  LBB1_509:
 11357  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 11358  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 11359  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11360  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11361  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 11362  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 11363  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 11364  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 11365  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11366  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11367  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 11368  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 11369  	LONG $0x08c78348                           // add    rdi, 8
 11370  	LONG $0x02c18348                           // add    rcx, 2
 11371  	JNE  LBB1_509
 11372  	JMP  LBB1_766
 11373  
 11374  LBB1_510:
 11375  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11376  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 11377  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 11378  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 11379  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 11380  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11381  	LONG $0x02e9c149             // shr    r9, 2
 11382  	LONG $0x01c18349             // add    r9, 1
 11383  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11384  	JE   LBB1_773
 11385  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11386  	LONG $0xfee18348             // and    rcx, -2
 11387  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11388  	WORD $0xff31                 // xor    edi, edi
 11389  
 11390  LBB1_512:
 11391  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 11392  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 11393  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 11394  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 11395  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 11396  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 11397  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 11398  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 11399  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 11400  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 11401  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 11402  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 11403  	LONG $0x08c78348                           // add    rdi, 8
 11404  	LONG $0x02c18348                           // add    rcx, 2
 11405  	JNE  LBB1_512
 11406  	JMP  LBB1_774
 11407  
 11408  LBB1_513:
 11409  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11410  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 11411  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 11412  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 11413  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 11414  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11415  	LONG $0x02e9c149             // shr    r9, 2
 11416  	LONG $0x01c18349             // add    r9, 1
 11417  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11418  	JE   LBB1_781
 11419  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11420  	LONG $0xfee18348             // and    rcx, -2
 11421  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11422  	WORD $0xff31                 // xor    edi, edi
 11423  
 11424  LBB1_515:
 11425  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 11426  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 11427  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11428  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11429  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 11430  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 11431  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 11432  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 11433  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11434  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11435  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 11436  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 11437  	LONG $0x08c78348                           // add    rdi, 8
 11438  	LONG $0x02c18348                           // add    rcx, 2
 11439  	JNE  LBB1_515
 11440  	JMP  LBB1_782
 11441  
 11442  LBB1_516:
 11443  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11444  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 11445  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 11446  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 11447  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 11448  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11449  	LONG $0x02e9c149             // shr    r9, 2
 11450  	LONG $0x01c18349             // add    r9, 1
 11451  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11452  	JE   LBB1_789
 11453  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11454  	LONG $0xfee18348             // and    rcx, -2
 11455  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11456  	WORD $0xff31                 // xor    edi, edi
 11457  
 11458  LBB1_518:
 11459  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 11460  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 11461  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 11462  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 11463  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 11464  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 11465  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 11466  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 11467  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 11468  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 11469  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 11470  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 11471  	LONG $0x08c78348                           // add    rdi, 8
 11472  	LONG $0x02c18348                           // add    rcx, 2
 11473  	JNE  LBB1_518
 11474  	JMP  LBB1_790
 11475  
 11476  LBB1_519:
 11477  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11478  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11479  	LONG $0xc06e0f66             // movd    xmm0, eax
 11480  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11481  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11482  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11483  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11484  	LONG $0x04e9c149             // shr    r9, 4
 11485  	LONG $0x01c18349             // add    r9, 1
 11486  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11487  	JE   LBB1_797
 11488  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11489  	LONG $0xfee18348             // and    rcx, -2
 11490  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11491  	WORD $0xff31                 // xor    edi, edi
 11492  
 11493  LBB1_521:
 11494  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11495  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11496  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11497  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11498  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11499  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11500  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11501  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11502  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11503  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11504  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11505  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11506  	LONG $0x20c78348                           // add    rdi, 32
 11507  	LONG $0x02c18348                           // add    rcx, 2
 11508  	JNE  LBB1_521
 11509  	JMP  LBB1_798
 11510  
 11511  LBB1_522:
 11512  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11513  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11514  	LONG $0xc06e0f66             // movd    xmm0, eax
 11515  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11516  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11517  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11518  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11519  	LONG $0x04e9c149             // shr    r9, 4
 11520  	LONG $0x01c18349             // add    r9, 1
 11521  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11522  	JE   LBB1_805
 11523  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11524  	LONG $0xfee18348             // and    rcx, -2
 11525  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11526  	WORD $0xff31                 // xor    edi, edi
 11527  
 11528  LBB1_524:
 11529  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11530  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11531  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11532  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11533  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11534  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11535  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11536  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11537  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11538  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11539  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11540  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11541  	LONG $0x20c78348                           // add    rdi, 32
 11542  	LONG $0x02c18348                           // add    rcx, 2
 11543  	JNE  LBB1_524
 11544  	JMP  LBB1_806
 11545  
 11546  LBB1_525:
 11547  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11548  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11549  	LONG $0xc06e0f66             // movd    xmm0, eax
 11550  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11551  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11552  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11553  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11554  	LONG $0x04e9c149             // shr    r9, 4
 11555  	LONG $0x01c18349             // add    r9, 1
 11556  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11557  	JE   LBB1_813
 11558  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11559  	LONG $0xfee18348             // and    rcx, -2
 11560  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11561  	WORD $0xff31                 // xor    edi, edi
 11562  
 11563  LBB1_527:
 11564  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11565  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11566  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11567  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11568  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11569  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11570  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11571  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11572  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11573  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11574  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11575  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11576  	LONG $0x20c78348                           // add    rdi, 32
 11577  	LONG $0x02c18348                           // add    rcx, 2
 11578  	JNE  LBB1_527
 11579  	JMP  LBB1_814
 11580  
 11581  LBB1_528:
 11582  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11583  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11584  	LONG $0xc06e0f66             // movd    xmm0, eax
 11585  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11586  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11587  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11588  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11589  	LONG $0x04e9c149             // shr    r9, 4
 11590  	LONG $0x01c18349             // add    r9, 1
 11591  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11592  	JE   LBB1_821
 11593  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11594  	LONG $0xfee18348             // and    rcx, -2
 11595  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11596  	WORD $0xff31                 // xor    edi, edi
 11597  
 11598  LBB1_530:
 11599  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11600  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11601  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11602  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11603  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11604  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11605  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11606  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11607  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 11608  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 11609  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11610  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11611  	LONG $0x20c78348                           // add    rdi, 32
 11612  	LONG $0x02c18348                           // add    rcx, 2
 11613  	JNE  LBB1_530
 11614  	JMP  LBB1_822
 11615  
 11616  LBB1_531:
 11617  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11618  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11619  	LONG $0xc06e0f66             // movd    xmm0, eax
 11620  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11621  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11622  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11623  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11624  	LONG $0x04e9c149             // shr    r9, 4
 11625  	LONG $0x01c18349             // add    r9, 1
 11626  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11627  	JE   LBB1_829
 11628  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11629  	LONG $0xfee18348             // and    rcx, -2
 11630  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11631  	WORD $0xff31                 // xor    edi, edi
 11632  
 11633  LBB1_533:
 11634  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11635  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11636  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11637  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11638  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11639  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11640  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11641  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11642  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11643  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11644  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11645  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11646  	LONG $0x20c78348                           // add    rdi, 32
 11647  	LONG $0x02c18348                           // add    rcx, 2
 11648  	JNE  LBB1_533
 11649  	JMP  LBB1_830
 11650  
 11651  LBB1_534:
 11652  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11653  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11654  	LONG $0xc06e0f66             // movd    xmm0, eax
 11655  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11656  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11657  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11658  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11659  	LONG $0x04e9c149             // shr    r9, 4
 11660  	LONG $0x01c18349             // add    r9, 1
 11661  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11662  	JE   LBB1_837
 11663  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11664  	LONG $0xfee18348             // and    rcx, -2
 11665  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11666  	WORD $0xff31                 // xor    edi, edi
 11667  
 11668  LBB1_536:
 11669  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11670  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11671  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11672  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11673  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11674  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11675  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11676  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11677  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11678  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11679  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11680  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11681  	LONG $0x20c78348                           // add    rdi, 32
 11682  	LONG $0x02c18348                           // add    rcx, 2
 11683  	JNE  LBB1_536
 11684  	JMP  LBB1_838
 11685  
 11686  LBB1_537:
 11687  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11688  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11689  	LONG $0xc06e0f66             // movd    xmm0, eax
 11690  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11691  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11692  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11693  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11694  	LONG $0x04e9c149             // shr    r9, 4
 11695  	LONG $0x01c18349             // add    r9, 1
 11696  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11697  	JE   LBB1_845
 11698  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11699  	LONG $0xfee18348             // and    rcx, -2
 11700  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11701  	WORD $0xff31                 // xor    edi, edi
 11702  
 11703  LBB1_539:
 11704  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11705  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11706  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11707  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11708  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11709  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11710  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11711  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11712  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11713  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11714  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11715  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11716  	LONG $0x20c78348                           // add    rdi, 32
 11717  	LONG $0x02c18348                           // add    rcx, 2
 11718  	JNE  LBB1_539
 11719  	JMP  LBB1_846
 11720  
 11721  LBB1_540:
 11722  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11723  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11724  	LONG $0xc06e0f66             // movd    xmm0, eax
 11725  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11726  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11727  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11728  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11729  	LONG $0x04e9c149             // shr    r9, 4
 11730  	LONG $0x01c18349             // add    r9, 1
 11731  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11732  	JE   LBB1_853
 11733  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11734  	LONG $0xfee18348             // and    rcx, -2
 11735  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11736  	WORD $0xff31                 // xor    edi, edi
 11737  
 11738  LBB1_542:
 11739  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11740  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11741  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11742  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11743  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11744  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11745  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11746  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11747  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11748  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11749  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11750  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11751  	LONG $0x20c78348                           // add    rdi, 32
 11752  	LONG $0x02c18348                           // add    rcx, 2
 11753  	JNE  LBB1_542
 11754  	JMP  LBB1_854
 11755  
 11756  LBB1_543:
 11757  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11758  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11759  	LONG $0xc06e0f66             // movd    xmm0, eax
 11760  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11761  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11762  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11763  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11764  	LONG $0x04e9c149             // shr    r9, 4
 11765  	LONG $0x01c18349             // add    r9, 1
 11766  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11767  	JE   LBB1_861
 11768  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11769  	LONG $0xfee18348             // and    rcx, -2
 11770  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11771  	WORD $0xff31                 // xor    edi, edi
 11772  
 11773  LBB1_545:
 11774  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11775  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11776  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11777  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11778  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11779  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11780  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11781  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11782  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11783  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11784  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11785  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11786  	LONG $0x20c78348                           // add    rdi, 32
 11787  	LONG $0x02c18348                           // add    rcx, 2
 11788  	JNE  LBB1_545
 11789  	JMP  LBB1_862
 11790  
 11791  LBB1_546:
 11792  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11793  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11794  	LONG $0xc06e0f66             // movd    xmm0, eax
 11795  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11796  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11797  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11798  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11799  	LONG $0x04e9c149             // shr    r9, 4
 11800  	LONG $0x01c18349             // add    r9, 1
 11801  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11802  	JE   LBB1_869
 11803  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11804  	LONG $0xfee18348             // and    rcx, -2
 11805  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11806  	WORD $0xff31                 // xor    edi, edi
 11807  
 11808  LBB1_548:
 11809  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11810  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11811  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11812  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11813  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11814  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11815  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11816  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11817  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 11818  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 11819  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11820  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11821  	LONG $0x20c78348                           // add    rdi, 32
 11822  	LONG $0x02c18348                           // add    rcx, 2
 11823  	JNE  LBB1_548
 11824  	JMP  LBB1_870
 11825  
 11826  LBB1_549:
 11827  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11828  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11829  	LONG $0xc06e0f66             // movd    xmm0, eax
 11830  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11831  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11832  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11833  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11834  	LONG $0x04e9c149             // shr    r9, 4
 11835  	LONG $0x01c18349             // add    r9, 1
 11836  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11837  	JE   LBB1_877
 11838  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11839  	LONG $0xfee18348             // and    rcx, -2
 11840  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11841  	WORD $0xff31                 // xor    edi, edi
 11842  
 11843  LBB1_551:
 11844  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11845  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11846  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11847  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11848  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11849  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11850  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11851  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11852  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11853  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11854  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11855  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11856  	LONG $0x20c78348                           // add    rdi, 32
 11857  	LONG $0x02c18348                           // add    rcx, 2
 11858  	JNE  LBB1_551
 11859  	JMP  LBB1_878
 11860  
 11861  LBB1_552:
 11862  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11863  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 11864  	LONG $0xc06e0f66             // movd    xmm0, eax
 11865  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 11866  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 11867  	LONG $0xf04e8d48             // lea    rcx, [rsi - 16]
 11868  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11869  	LONG $0x04e9c149             // shr    r9, 4
 11870  	LONG $0x01c18349             // add    r9, 1
 11871  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11872  	JE   LBB1_885
 11873  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11874  	LONG $0xfee18348             // and    rcx, -2
 11875  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11876  	WORD $0xff31                 // xor    edi, edi
 11877  
 11878  LBB1_554:
 11879  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 11880  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 11881  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11882  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11883  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 11884  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 11885  	LONG $0x4c6f0ff3; WORD $0x207a             // movdqu    xmm1, oword [rdx + 2*rdi + 32]
 11886  	LONG $0x546f0ff3; WORD $0x307a             // movdqu    xmm2, oword [rdx + 2*rdi + 48]
 11887  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 11888  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 11889  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 11890  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 11891  	LONG $0x20c78348                           // add    rdi, 32
 11892  	LONG $0x02c18348                           // add    rcx, 2
 11893  	JNE  LBB1_554
 11894  	JMP  LBB1_886
 11895  
 11896  LBB1_555:
 11897  	WORD $0xc189             // mov    ecx, eax
 11898  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 11899  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 11900  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 11901  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 11902  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 11903  	LONG $0x03e9c149         // shr    r9, 3
 11904  	LONG $0x01c18349         // add    r9, 1
 11905  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 11906  	JE   LBB1_893
 11907  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 11908  	LONG $0xfee68348         // and    rsi, -2
 11909  	WORD $0xf748; BYTE $0xde // neg    rsi
 11910  	WORD $0xff31             // xor    edi, edi
 11911  
 11912  LBB1_557:
 11913  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 11914  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 11915  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 11916  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 11917  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 11918  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 11919  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 11920  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 11921  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 11922  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 11923  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 11924  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 11925  	LONG $0x10c78348               // add    rdi, 16
 11926  	LONG $0x02c68348               // add    rsi, 2
 11927  	JNE  LBB1_557
 11928  	JMP  LBB1_894
 11929  
 11930  LBB1_558:
 11931  	WORD $0xc189             // mov    ecx, eax
 11932  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 11933  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 11934  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 11935  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 11936  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 11937  	LONG $0x03e9c149         // shr    r9, 3
 11938  	LONG $0x01c18349         // add    r9, 1
 11939  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 11940  	JE   LBB1_901
 11941  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 11942  	LONG $0xfee68348         // and    rsi, -2
 11943  	WORD $0xf748; BYTE $0xde // neg    rsi
 11944  	WORD $0xff31             // xor    edi, edi
 11945  
 11946  LBB1_560:
 11947  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 11948  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 11949  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 11950  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 11951  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 11952  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 11953  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 11954  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 11955  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 11956  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 11957  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 11958  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 11959  	LONG $0x10c78348               // add    rdi, 16
 11960  	LONG $0x02c68348               // add    rsi, 2
 11961  	JNE  LBB1_560
 11962  	JMP  LBB1_902
 11963  
 11964  LBB1_561:
 11965  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 11966  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 11967  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 11968  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 11969  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 11970  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 11971  	LONG $0x02e9c149             // shr    r9, 2
 11972  	LONG $0x01c18349             // add    r9, 1
 11973  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 11974  	JE   LBB1_909
 11975  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 11976  	LONG $0xfee18348             // and    rcx, -2
 11977  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 11978  	WORD $0xff31                 // xor    edi, edi
 11979  
 11980  LBB1_563:
 11981  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 11982  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 11983  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11984  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11985  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 11986  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 11987  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 11988  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 11989  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 11990  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 11991  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 11992  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 11993  	LONG $0x08c78348                           // add    rdi, 8
 11994  	LONG $0x02c18348                           // add    rcx, 2
 11995  	JNE  LBB1_563
 11996  	JMP  LBB1_910
 11997  
 11998  LBB1_564:
 11999  	WORD $0xc189             // mov    ecx, eax
 12000  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 12001  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 12002  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 12003  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 12004  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 12005  	LONG $0x03e9c149         // shr    r9, 3
 12006  	LONG $0x01c18349         // add    r9, 1
 12007  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 12008  	JE   LBB1_917
 12009  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 12010  	LONG $0xfee68348         // and    rsi, -2
 12011  	WORD $0xf748; BYTE $0xde // neg    rsi
 12012  	WORD $0xff31             // xor    edi, edi
 12013  
 12014  LBB1_566:
 12015  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 12016  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 12017  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 12018  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 12019  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 12020  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 12021  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 12022  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 12023  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 12024  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 12025  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 12026  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 12027  	LONG $0x10c78348               // add    rdi, 16
 12028  	LONG $0x02c68348               // add    rsi, 2
 12029  	JNE  LBB1_566
 12030  	JMP  LBB1_918
 12031  
 12032  LBB1_567:
 12033  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12034  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 12035  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 12036  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 12037  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 12038  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12039  	LONG $0x02e9c149             // shr    r9, 2
 12040  	LONG $0x01c18349             // add    r9, 1
 12041  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12042  	JE   LBB1_925
 12043  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12044  	LONG $0xfee18348             // and    rcx, -2
 12045  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12046  	WORD $0xff31                 // xor    edi, edi
 12047  
 12048  LBB1_569:
 12049  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 12050  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 12051  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 12052  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 12053  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 12054  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 12055  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 12056  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 12057  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 12058  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 12059  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 12060  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 12061  	LONG $0x08c78348                           // add    rdi, 8
 12062  	LONG $0x02c18348                           // add    rcx, 2
 12063  	JNE  LBB1_569
 12064  	JMP  LBB1_926
 12065  
 12066  LBB1_570:
 12067  	WORD $0xc189             // mov    ecx, eax
 12068  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 12069  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 12070  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 12071  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 12072  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 12073  	LONG $0x03e9c149         // shr    r9, 3
 12074  	LONG $0x01c18349         // add    r9, 1
 12075  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 12076  	JE   LBB1_933
 12077  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 12078  	LONG $0xfee68348         // and    rsi, -2
 12079  	WORD $0xf748; BYTE $0xde // neg    rsi
 12080  	WORD $0xff31             // xor    edi, edi
 12081  
 12082  LBB1_572:
 12083  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 12084  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 12085  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 12086  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 12087  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 12088  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 12089  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 12090  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 12091  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 12092  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 12093  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 12094  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 12095  	LONG $0x10c78348               // add    rdi, 16
 12096  	LONG $0x02c68348               // add    rsi, 2
 12097  	JNE  LBB1_572
 12098  	JMP  LBB1_934
 12099  
 12100  LBB1_573:
 12101  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12102  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 12103  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 12104  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 12105  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 12106  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12107  	LONG $0x02e9c149             // shr    r9, 2
 12108  	LONG $0x01c18349             // add    r9, 1
 12109  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12110  	JE   LBB1_941
 12111  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12112  	LONG $0xfee18348             // and    rcx, -2
 12113  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12114  	WORD $0xff31                 // xor    edi, edi
 12115  
 12116  LBB1_575:
 12117  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 12118  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 12119  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 12120  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 12121  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 12122  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 12123  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 12124  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 12125  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 12126  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 12127  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 12128  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 12129  	LONG $0x08c78348                           // add    rdi, 8
 12130  	LONG $0x02c18348                           // add    rcx, 2
 12131  	JNE  LBB1_575
 12132  	JMP  LBB1_942
 12133  
 12134  LBB1_576:
 12135  	WORD $0xc189             // mov    ecx, eax
 12136  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 12137  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 12138  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 12139  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 12140  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 12141  	LONG $0x03e9c149         // shr    r9, 3
 12142  	LONG $0x01c18349         // add    r9, 1
 12143  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 12144  	JE   LBB1_949
 12145  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 12146  	LONG $0xfee68348         // and    rsi, -2
 12147  	WORD $0xf748; BYTE $0xde // neg    rsi
 12148  	WORD $0xff31             // xor    edi, edi
 12149  
 12150  LBB1_578:
 12151  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 12152  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 12153  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 12154  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 12155  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 12156  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 12157  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 12158  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 12159  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 12160  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 12161  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 12162  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 12163  	LONG $0x10c78348               // add    rdi, 16
 12164  	LONG $0x02c68348               // add    rsi, 2
 12165  	JNE  LBB1_578
 12166  	JMP  LBB1_950
 12167  
 12168  LBB1_579:
 12169  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12170  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 12171  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 12172  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 12173  	LONG $0xfc4e8d48             // lea    rcx, [rsi - 4]
 12174  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12175  	LONG $0x02e9c149             // shr    r9, 2
 12176  	LONG $0x01c18349             // add    r9, 1
 12177  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12178  	JE   LBB1_957
 12179  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12180  	LONG $0xfee18348             // and    rcx, -2
 12181  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12182  	WORD $0xff31                 // xor    edi, edi
 12183  
 12184  LBB1_581:
 12185  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 12186  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 12187  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 12188  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 12189  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 12190  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 12191  	LONG $0x4c6f0ff3; WORD $0x20fa             // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 12192  	LONG $0x546f0ff3; WORD $0x30fa             // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 12193  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 12194  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 12195  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 12196  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 12197  	LONG $0x08c78348                           // add    rdi, 8
 12198  	LONG $0x02c18348                           // add    rcx, 2
 12199  	JNE  LBB1_581
 12200  	JMP  LBB1_958
 12201  
 12202  LBB1_582:
 12203  	WORD $0xc189             // mov    ecx, eax
 12204  	WORD $0xe183; BYTE $0xf8 // and    ecx, -8
 12205  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 12206  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 12207  	LONG $0xf8718d48         // lea    rsi, [rcx - 8]
 12208  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 12209  	LONG $0x03e9c149         // shr    r9, 3
 12210  	LONG $0x01c18349         // add    r9, 1
 12211  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 12212  	JE   LBB1_965
 12213  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 12214  	LONG $0xfee68348         // and    rsi, -2
 12215  	WORD $0xf748; BYTE $0xde // neg    rsi
 12216  	WORD $0xff31             // xor    edi, edi
 12217  
 12218  LBB1_584:
 12219  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 12220  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 12221  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 12222  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 12223  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 12224  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 12225  	LONG $0xba54100f; BYTE $0x20   // movups    xmm2, oword [rdx + 4*rdi + 32]
 12226  	LONG $0xba5c100f; BYTE $0x30   // movups    xmm3, oword [rdx + 4*rdi + 48]
 12227  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 12228  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 12229  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 12230  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 12231  	LONG $0x10c78348               // add    rdi, 16
 12232  	LONG $0x02c68348               // add    rsi, 2
 12233  	JNE  LBB1_584
 12234  	JMP  LBB1_966
 12235  
 12236  LBB1_585:
 12237  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 12238  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 12239  	WORD $0xb60f; BYTE $0xc1     // movzx    eax, cl
 12240  	LONG $0xc06e0f66             // movd    xmm0, eax
 12241  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12242  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12243  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 12244  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 12245  	LONG $0x05e9c149             // shr    r9, 5
 12246  	LONG $0x01c18349             // add    r9, 1
 12247  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 12248  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 12249  	JE   LBB1_973
 12250  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 12251  	LONG $0xfee68348             // and    rsi, -2
 12252  	WORD $0xf748; BYTE $0xde     // neg    rsi
 12253  	WORD $0xc031                 // xor    eax, eax
 12254  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 12255  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 12256  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI1_0] */
 12257  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 12258  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 12259  
 12260  LBB1_587:
 12261  	LONG $0x2c6f0ff3; BYTE $0x02               // movdqu    xmm5, oword [rdx + rax]
 12262  	LONG $0x746f0ff3; WORD $0x1002             // movdqu    xmm6, oword [rdx + rax + 16]
 12263  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 12264  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 12265  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 12266  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12267  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 12268  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 12269  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 12270  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 12271  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 12272  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 12273  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 12274  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12275  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12276  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 12277  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 12278  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 12279  	LONG $0x6c6f0ff3; WORD $0x2002             // movdqu    xmm5, oword [rdx + rax + 32]
 12280  	LONG $0x746f0ff3; WORD $0x3002             // movdqu    xmm6, oword [rdx + rax + 48]
 12281  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 12282  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 12283  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 12284  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12285  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 12286  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 12287  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 12288  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 12289  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 12290  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 12291  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 12292  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12293  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12294  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 12295  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 12296  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 12297  	LONG $0x40c08348                           // add    rax, 64
 12298  	LONG $0x02c68348                           // add    rsi, 2
 12299  	JNE  LBB1_587
 12300  	JMP  LBB1_974
 12301  
 12302  LBB1_588:
 12303  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 12304  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 12305  	WORD $0xb60f; BYTE $0xc1     // movzx    eax, cl
 12306  	LONG $0xc06e0f66             // movd    xmm0, eax
 12307  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12308  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12309  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 12310  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 12311  	LONG $0x05e9c149             // shr    r9, 5
 12312  	LONG $0x01c18349             // add    r9, 1
 12313  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 12314  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 12315  	JE   LBB1_981
 12316  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 12317  	LONG $0xfee68348             // and    rsi, -2
 12318  	WORD $0xf748; BYTE $0xde     // neg    rsi
 12319  	WORD $0xc031                 // xor    eax, eax
 12320  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 12321  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 12322  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI1_0] */
 12323  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 12324  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 12325  
 12326  LBB1_590:
 12327  	LONG $0x2c6f0ff3; BYTE $0x02               // movdqu    xmm5, oword [rdx + rax]
 12328  	LONG $0x746f0ff3; WORD $0x1002             // movdqu    xmm6, oword [rdx + rax + 16]
 12329  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 12330  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 12331  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 12332  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12333  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 12334  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 12335  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 12336  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 12337  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 12338  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 12339  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 12340  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12341  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12342  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 12343  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 12344  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 12345  	LONG $0x6c6f0ff3; WORD $0x2002             // movdqu    xmm5, oword [rdx + rax + 32]
 12346  	LONG $0x746f0ff3; WORD $0x3002             // movdqu    xmm6, oword [rdx + rax + 48]
 12347  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 12348  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 12349  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 12350  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12351  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 12352  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 12353  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 12354  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 12355  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 12356  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 12357  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 12358  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12359  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 12360  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 12361  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 12362  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 12363  	LONG $0x40c08348                           // add    rax, 64
 12364  	LONG $0x02c68348                           // add    rsi, 2
 12365  	JNE  LBB1_590
 12366  	JMP  LBB1_982
 12367  
 12368  LBB1_591:
 12369  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12370  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 12371  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 12372  	LONG $0xc16e0f66             // movd    xmm0, ecx
 12373  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12374  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12375  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 12376  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12377  	LONG $0x05e9c149             // shr    r9, 5
 12378  	LONG $0x01c18349             // add    r9, 1
 12379  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12380  	JE   LBB1_989
 12381  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12382  	LONG $0xfee18348             // and    rcx, -2
 12383  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12384  	WORD $0xff31                 // xor    edi, edi
 12385  
 12386  LBB1_593:
 12387  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 12388  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 12389  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 12390  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 12391  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 12392  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 12393  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 12394  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 12395  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 12396  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 12397  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 12398  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 12399  	LONG $0x40c78348                           // add    rdi, 64
 12400  	LONG $0x02c18348                           // add    rcx, 2
 12401  	JNE  LBB1_593
 12402  	JMP  LBB1_990
 12403  
 12404  LBB1_594:
 12405  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12406  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 12407  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 12408  	LONG $0xc16e0f66             // movd    xmm0, ecx
 12409  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12410  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12411  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 12412  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12413  	LONG $0x05e9c149             // shr    r9, 5
 12414  	LONG $0x01c18349             // add    r9, 1
 12415  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12416  	JE   LBB1_997
 12417  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12418  	LONG $0xfee18348             // and    rcx, -2
 12419  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12420  	WORD $0xff31                 // xor    edi, edi
 12421  
 12422  LBB1_596:
 12423  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 12424  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 12425  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 12426  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 12427  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 12428  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 12429  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 12430  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 12431  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 12432  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 12433  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 12434  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 12435  	LONG $0x40c78348                           // add    rdi, 64
 12436  	LONG $0x02c18348                           // add    rcx, 2
 12437  	JNE  LBB1_596
 12438  	JMP  LBB1_998
 12439  
 12440  LBB1_597:
 12441  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12442  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 12443  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 12444  	LONG $0xc16e0f66             // movd    xmm0, ecx
 12445  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12446  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12447  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 12448  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12449  	LONG $0x05e9c149             // shr    r9, 5
 12450  	LONG $0x01c18349             // add    r9, 1
 12451  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12452  	JE   LBB1_1005
 12453  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12454  	LONG $0xfee18348             // and    rcx, -2
 12455  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12456  	WORD $0xff31                 // xor    edi, edi
 12457  
 12458  LBB1_599:
 12459  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 12460  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 12461  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 12462  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 12463  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 12464  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 12465  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 12466  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 12467  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 12468  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 12469  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 12470  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 12471  	LONG $0x40c78348                           // add    rdi, 64
 12472  	LONG $0x02c18348                           // add    rcx, 2
 12473  	JNE  LBB1_599
 12474  	JMP  LBB1_1006
 12475  
 12476  LBB1_600:
 12477  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12478  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 12479  	WORD $0xb60f; BYTE $0xc8     // movzx    ecx, al
 12480  	LONG $0xc16e0f66             // movd    xmm0, ecx
 12481  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 12482  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 12483  	LONG $0xe04e8d48             // lea    rcx, [rsi - 32]
 12484  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12485  	LONG $0x05e9c149             // shr    r9, 5
 12486  	LONG $0x01c18349             // add    r9, 1
 12487  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12488  	JE   LBB1_1013
 12489  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12490  	LONG $0xfee18348             // and    rcx, -2
 12491  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12492  	WORD $0xff31                 // xor    edi, edi
 12493  
 12494  LBB1_602:
 12495  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 12496  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 12497  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 12498  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 12499  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 12500  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 12501  	LONG $0x4c6f0ff3; WORD $0x203a             // movdqu    xmm1, oword [rdx + rdi + 32]
 12502  	LONG $0x546f0ff3; WORD $0x303a             // movdqu    xmm2, oword [rdx + rdi + 48]
 12503  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 12504  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 12505  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 12506  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 12507  	LONG $0x40c78348                           // add    rdi, 64
 12508  	LONG $0x02c18348                           // add    rcx, 2
 12509  	JNE  LBB1_602
 12510  	JMP  LBB1_1014
 12511  
 12512  LBB1_603:
 12513  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12514  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12515  	LONG $0xc06e0f66             // movd    xmm0, eax
 12516  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12517  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12518  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12519  	LONG $0x03e9c149             // shr    r9, 3
 12520  	LONG $0x01c18349             // add    r9, 1
 12521  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12522  	JE   LBB1_1021
 12523  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12524  	LONG $0xfee18348             // and    rcx, -2
 12525  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12526  	WORD $0xff31                 // xor    edi, edi
 12527  
 12528  LBB1_605:
 12529  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12530  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12531  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12532  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12533  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12534  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12535  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12536  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12537  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12538  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12539  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12540  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12541  	LONG $0x10c78348                           // add    rdi, 16
 12542  	LONG $0x02c18348                           // add    rcx, 2
 12543  	JNE  LBB1_605
 12544  	JMP  LBB1_1022
 12545  
 12546  LBB1_606:
 12547  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12548  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12549  	LONG $0xc06e0f66             // movd    xmm0, eax
 12550  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12551  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12552  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12553  	LONG $0x03e9c149             // shr    r9, 3
 12554  	LONG $0x01c18349             // add    r9, 1
 12555  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12556  	JE   LBB1_1029
 12557  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12558  	LONG $0xfee18348             // and    rcx, -2
 12559  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12560  	WORD $0xff31                 // xor    edi, edi
 12561  
 12562  LBB1_608:
 12563  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12564  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12565  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12566  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12567  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12568  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12569  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12570  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12571  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12572  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12573  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12574  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12575  	LONG $0x10c78348                           // add    rdi, 16
 12576  	LONG $0x02c18348                           // add    rcx, 2
 12577  	JNE  LBB1_608
 12578  	JMP  LBB1_1030
 12579  
 12580  LBB1_609:
 12581  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12582  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12583  	LONG $0xc06e0f66             // movd    xmm0, eax
 12584  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12585  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12586  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12587  	LONG $0x03e9c149             // shr    r9, 3
 12588  	LONG $0x01c18349             // add    r9, 1
 12589  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12590  	JE   LBB1_1037
 12591  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12592  	LONG $0xfee18348             // and    rcx, -2
 12593  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12594  	WORD $0xff31                 // xor    edi, edi
 12595  
 12596  LBB1_611:
 12597  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12598  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12599  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12600  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12601  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12602  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12603  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12604  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12605  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12606  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12607  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12608  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12609  	LONG $0x10c78348                           // add    rdi, 16
 12610  	LONG $0x02c18348                           // add    rcx, 2
 12611  	JNE  LBB1_611
 12612  	JMP  LBB1_1038
 12613  
 12614  LBB1_612:
 12615  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12616  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12617  	LONG $0xc06e0f66             // movd    xmm0, eax
 12618  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12619  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12620  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12621  	LONG $0x03e9c149             // shr    r9, 3
 12622  	LONG $0x01c18349             // add    r9, 1
 12623  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12624  	JE   LBB1_1045
 12625  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12626  	LONG $0xfee18348             // and    rcx, -2
 12627  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12628  	WORD $0xff31                 // xor    edi, edi
 12629  
 12630  LBB1_614:
 12631  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12632  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12633  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12634  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12635  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12636  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12637  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12638  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12639  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12640  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12641  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12642  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12643  	LONG $0x10c78348                           // add    rdi, 16
 12644  	LONG $0x02c18348                           // add    rcx, 2
 12645  	JNE  LBB1_614
 12646  	JMP  LBB1_1046
 12647  
 12648  LBB1_615:
 12649  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12650  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12651  	LONG $0xc06e0f66             // movd    xmm0, eax
 12652  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12653  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12654  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12655  	LONG $0x03e9c149             // shr    r9, 3
 12656  	LONG $0x01c18349             // add    r9, 1
 12657  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12658  	JE   LBB1_1053
 12659  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12660  	LONG $0xfee18348             // and    rcx, -2
 12661  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12662  	WORD $0xff31                 // xor    edi, edi
 12663  
 12664  LBB1_617:
 12665  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12666  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12667  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12668  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12669  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12670  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12671  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12672  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12673  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12674  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12675  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12676  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12677  	LONG $0x10c78348                           // add    rdi, 16
 12678  	LONG $0x02c18348                           // add    rcx, 2
 12679  	JNE  LBB1_617
 12680  	JMP  LBB1_1054
 12681  
 12682  LBB1_618:
 12683  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 12684  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 12685  	LONG $0xc06e0f66             // movd    xmm0, eax
 12686  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 12687  	LONG $0xf84e8d48             // lea    rcx, [rsi - 8]
 12688  	WORD $0x8949; BYTE $0xc9     // mov    r9, rcx
 12689  	LONG $0x03e9c149             // shr    r9, 3
 12690  	LONG $0x01c18349             // add    r9, 1
 12691  	WORD $0x8548; BYTE $0xc9     // test    rcx, rcx
 12692  	JE   LBB1_1061
 12693  	WORD $0x894c; BYTE $0xc9     // mov    rcx, r9
 12694  	LONG $0xfee18348             // and    rcx, -2
 12695  	WORD $0xf748; BYTE $0xd9     // neg    rcx
 12696  	WORD $0xff31                 // xor    edi, edi
 12697  
 12698  LBB1_620:
 12699  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12700  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12701  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12702  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12703  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12704  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12705  	LONG $0x4c6f0ff3; WORD $0x20ba             // movdqu    xmm1, oword [rdx + 4*rdi + 32]
 12706  	LONG $0x546f0ff3; WORD $0x30ba             // movdqu    xmm2, oword [rdx + 4*rdi + 48]
 12707  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12708  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12709  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 12710  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 12711  	LONG $0x10c78348                           // add    rdi, 16
 12712  	LONG $0x02c18348                           // add    rcx, 2
 12713  	JNE  LBB1_620
 12714  	JMP  LBB1_1062
 12715  
 12716  LBB1_621:
 12717  	WORD $0xff31 // xor    edi, edi
 12718  
 12719  LBB1_622:
 12720  	LONG $0x01c1f641                           // test    r9b, 1
 12721  	JE   LBB1_624
 12722  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12723  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12724  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12725  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12726  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12727  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12728  
 12729  LBB1_624:
 12730  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12731  	JE   LBB1_1069
 12732  	JMP  LBB1_625
 12733  
 12734  LBB1_629:
 12735  	WORD $0xff31 // xor    edi, edi
 12736  
 12737  LBB1_630:
 12738  	LONG $0x01c1f641                           // test    r9b, 1
 12739  	JE   LBB1_632
 12740  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12741  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12742  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 12743  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 12744  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12745  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12746  
 12747  LBB1_632:
 12748  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12749  	JE   LBB1_1069
 12750  	JMP  LBB1_633
 12751  
 12752  LBB1_637:
 12753  	WORD $0xff31 // xor    edi, edi
 12754  
 12755  LBB1_638:
 12756  	LONG $0x01c1f641                           // test    r9b, 1
 12757  	JE   LBB1_640
 12758  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12759  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12760  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12761  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12762  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12763  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12764  
 12765  LBB1_640:
 12766  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12767  	JE   LBB1_1069
 12768  	JMP  LBB1_641
 12769  
 12770  LBB1_645:
 12771  	WORD $0xff31 // xor    edi, edi
 12772  
 12773  LBB1_646:
 12774  	LONG $0x01c1f641                           // test    r9b, 1
 12775  	JE   LBB1_648
 12776  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12777  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12778  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12779  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12780  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12781  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12782  
 12783  LBB1_648:
 12784  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12785  	JE   LBB1_1069
 12786  	JMP  LBB1_649
 12787  
 12788  LBB1_653:
 12789  	WORD $0xff31 // xor    edi, edi
 12790  
 12791  LBB1_654:
 12792  	LONG $0x01c1f641                           // test    r9b, 1
 12793  	JE   LBB1_656
 12794  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12795  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12796  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 12797  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 12798  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12799  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12800  
 12801  LBB1_656:
 12802  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12803  	JE   LBB1_1069
 12804  	JMP  LBB1_657
 12805  
 12806  LBB1_661:
 12807  	WORD $0xff31 // xor    edi, edi
 12808  
 12809  LBB1_662:
 12810  	LONG $0x01c1f641                           // test    r9b, 1
 12811  	JE   LBB1_664
 12812  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 12813  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 12814  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 12815  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 12816  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 12817  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 12818  
 12819  LBB1_664:
 12820  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 12821  	JE   LBB1_1069
 12822  	JMP  LBB1_665
 12823  
 12824  LBB1_669:
 12825  	WORD $0xff31 // xor    edi, edi
 12826  
 12827  LBB1_670:
 12828  	LONG $0x01c1f641                           // test    r9b, 1
 12829  	JE   LBB1_672
 12830  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12831  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12832  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 12833  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 12834  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12835  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12836  
 12837  LBB1_672:
 12838  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12839  	JE   LBB1_1069
 12840  	JMP  LBB1_673
 12841  
 12842  LBB1_677:
 12843  	WORD $0xff31 // xor    edi, edi
 12844  
 12845  LBB1_678:
 12846  	LONG $0x01c1f641                           // test    r9b, 1
 12847  	JE   LBB1_680
 12848  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12849  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12850  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 12851  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 12852  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12853  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12854  
 12855  LBB1_680:
 12856  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12857  	JE   LBB1_1069
 12858  	JMP  LBB1_681
 12859  
 12860  LBB1_685:
 12861  	WORD $0xff31 // xor    edi, edi
 12862  
 12863  LBB1_686:
 12864  	LONG $0x01c1f641                           // test    r9b, 1
 12865  	JE   LBB1_688
 12866  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12867  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12868  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 12869  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 12870  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12871  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12872  
 12873  LBB1_688:
 12874  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12875  	JE   LBB1_1069
 12876  	JMP  LBB1_689
 12877  
 12878  LBB1_693:
 12879  	WORD $0xff31 // xor    edi, edi
 12880  
 12881  LBB1_694:
 12882  	LONG $0x01c1f641                           // test    r9b, 1
 12883  	JE   LBB1_696
 12884  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12885  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12886  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 12887  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 12888  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12889  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12890  
 12891  LBB1_696:
 12892  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12893  	JE   LBB1_1069
 12894  	JMP  LBB1_697
 12895  
 12896  LBB1_701:
 12897  	WORD $0xff31 // xor    edi, edi
 12898  
 12899  LBB1_702:
 12900  	LONG $0x01c1f641                           // test    r9b, 1
 12901  	JE   LBB1_704
 12902  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12903  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12904  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 12905  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 12906  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12907  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12908  
 12909  LBB1_704:
 12910  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12911  	JE   LBB1_1069
 12912  	JMP  LBB1_705
 12913  
 12914  LBB1_709:
 12915  	WORD $0xff31 // xor    edi, edi
 12916  
 12917  LBB1_710:
 12918  	LONG $0x01c1f641                           // test    r9b, 1
 12919  	JE   LBB1_712
 12920  	LONG $0x14100f66; BYTE $0xfa               // movupd    xmm2, oword [rdx + 8*rdi]
 12921  	LONG $0x5c100f66; WORD $0x10fa             // movupd    xmm3, oword [rdx + 8*rdi + 16]
 12922  	LONG $0xd15c0f66                           // subpd    xmm2, xmm1
 12923  	LONG $0xd95c0f66                           // subpd    xmm3, xmm1
 12924  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 12925  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 12926  
 12927  LBB1_712:
 12928  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 12929  	JE   LBB1_1069
 12930  	JMP  LBB1_713
 12931  
 12932  LBB1_717:
 12933  	WORD $0xc031 // xor    eax, eax
 12934  
 12935  LBB1_718:
 12936  	LONG $0x01c1f641                           // test    r9b, 1
 12937  	JE   LBB1_720
 12938  	LONG $0x146f0ff3; BYTE $0x02               // movdqu    xmm2, oword [rdx + rax]
 12939  	LONG $0x5c6f0ff3; WORD $0x1002             // movdqu    xmm3, oword [rdx + rax + 16]
 12940  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 12941  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 12942  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 12943  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 12944  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 12945  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI1_0] */
 12946  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 12947  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12948  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 12949  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 12950  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 12951  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 12952  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 12953  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 12954  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 12955  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 12956  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 12957  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 12958  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 12959  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 12960  
 12961  LBB1_720:
 12962  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 12963  	JE   LBB1_1069
 12964  	JMP  LBB1_721
 12965  
 12966  LBB1_725:
 12967  	WORD $0xc031 // xor    eax, eax
 12968  
 12969  LBB1_726:
 12970  	LONG $0x01c1f641                           // test    r9b, 1
 12971  	JE   LBB1_728
 12972  	LONG $0x146f0ff3; BYTE $0x02               // movdqu    xmm2, oword [rdx + rax]
 12973  	LONG $0x5c6f0ff3; WORD $0x1002             // movdqu    xmm3, oword [rdx + rax + 16]
 12974  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 12975  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 12976  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 12977  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 12978  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 12979  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI1_0] */
 12980  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 12981  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 12982  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 12983  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 12984  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 12985  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 12986  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 12987  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 12988  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 12989  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 12990  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 12991  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 12992  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 12993  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 12994  
 12995  LBB1_728:
 12996  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 12997  	JE   LBB1_1069
 12998  	JMP  LBB1_729
 12999  
 13000  LBB1_733:
 13001  	WORD $0xff31 // xor    edi, edi
 13002  
 13003  LBB1_734:
 13004  	LONG $0x01c1f641                           // test    r9b, 1
 13005  	JE   LBB1_736
 13006  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13007  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13008  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 13009  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 13010  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13011  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13012  
 13013  LBB1_736:
 13014  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13015  	JE   LBB1_1069
 13016  	JMP  LBB1_737
 13017  
 13018  LBB1_741:
 13019  	WORD $0xff31 // xor    edi, edi
 13020  
 13021  LBB1_742:
 13022  	LONG $0x01c1f641                           // test    r9b, 1
 13023  	JE   LBB1_744
 13024  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13025  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13026  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 13027  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 13028  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13029  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13030  
 13031  LBB1_744:
 13032  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13033  	JE   LBB1_1069
 13034  	JMP  LBB1_745
 13035  
 13036  LBB1_749:
 13037  	WORD $0xff31 // xor    edi, edi
 13038  
 13039  LBB1_750:
 13040  	LONG $0x01c1f641                           // test    r9b, 1
 13041  	JE   LBB1_752
 13042  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13043  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13044  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 13045  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 13046  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13047  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13048  
 13049  LBB1_752:
 13050  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13051  	JE   LBB1_1069
 13052  	JMP  LBB1_753
 13053  
 13054  LBB1_757:
 13055  	WORD $0xff31 // xor    edi, edi
 13056  
 13057  LBB1_758:
 13058  	LONG $0x01c1f641                           // test    r9b, 1
 13059  	JE   LBB1_760
 13060  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13061  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13062  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 13063  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 13064  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13065  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13066  
 13067  LBB1_760:
 13068  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13069  	JE   LBB1_1069
 13070  	JMP  LBB1_761
 13071  
 13072  LBB1_765:
 13073  	WORD $0xff31 // xor    edi, edi
 13074  
 13075  LBB1_766:
 13076  	LONG $0x01c1f641                           // test    r9b, 1
 13077  	JE   LBB1_768
 13078  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13079  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13080  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 13081  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 13082  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13083  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13084  
 13085  LBB1_768:
 13086  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13087  	JE   LBB1_1069
 13088  	JMP  LBB1_769
 13089  
 13090  LBB1_773:
 13091  	WORD $0xff31 // xor    edi, edi
 13092  
 13093  LBB1_774:
 13094  	LONG $0x01c1f641                           // test    r9b, 1
 13095  	JE   LBB1_776
 13096  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13097  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13098  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 13099  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 13100  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13101  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13102  
 13103  LBB1_776:
 13104  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13105  	JE   LBB1_1069
 13106  	JMP  LBB1_777
 13107  
 13108  LBB1_781:
 13109  	WORD $0xff31 // xor    edi, edi
 13110  
 13111  LBB1_782:
 13112  	LONG $0x01c1f641                           // test    r9b, 1
 13113  	JE   LBB1_784
 13114  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13115  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13116  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 13117  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 13118  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13119  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13120  
 13121  LBB1_784:
 13122  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13123  	JE   LBB1_1069
 13124  	JMP  LBB1_785
 13125  
 13126  LBB1_789:
 13127  	WORD $0xff31 // xor    edi, edi
 13128  
 13129  LBB1_790:
 13130  	LONG $0x01c1f641                           // test    r9b, 1
 13131  	JE   LBB1_792
 13132  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13133  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13134  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 13135  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 13136  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13137  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13138  
 13139  LBB1_792:
 13140  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13141  	JE   LBB1_1069
 13142  	JMP  LBB1_793
 13143  
 13144  LBB1_797:
 13145  	WORD $0xff31 // xor    edi, edi
 13146  
 13147  LBB1_798:
 13148  	LONG $0x01c1f641                           // test    r9b, 1
 13149  	JE   LBB1_800
 13150  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13151  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13152  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 13153  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 13154  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13155  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13156  
 13157  LBB1_800:
 13158  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13159  	JE   LBB1_1069
 13160  	JMP  LBB1_801
 13161  
 13162  LBB1_805:
 13163  	WORD $0xff31 // xor    edi, edi
 13164  
 13165  LBB1_806:
 13166  	LONG $0x01c1f641                           // test    r9b, 1
 13167  	JE   LBB1_808
 13168  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13169  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13170  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 13171  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 13172  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13173  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13174  
 13175  LBB1_808:
 13176  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13177  	JE   LBB1_1069
 13178  	JMP  LBB1_809
 13179  
 13180  LBB1_813:
 13181  	WORD $0xff31 // xor    edi, edi
 13182  
 13183  LBB1_814:
 13184  	LONG $0x01c1f641                           // test    r9b, 1
 13185  	JE   LBB1_816
 13186  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13187  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13188  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 13189  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 13190  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13191  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13192  
 13193  LBB1_816:
 13194  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13195  	JE   LBB1_1069
 13196  	JMP  LBB1_817
 13197  
 13198  LBB1_821:
 13199  	WORD $0xff31 // xor    edi, edi
 13200  
 13201  LBB1_822:
 13202  	LONG $0x01c1f641                           // test    r9b, 1
 13203  	JE   LBB1_824
 13204  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13205  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13206  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 13207  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 13208  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13209  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13210  
 13211  LBB1_824:
 13212  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13213  	JE   LBB1_1069
 13214  	JMP  LBB1_825
 13215  
 13216  LBB1_829:
 13217  	WORD $0xff31 // xor    edi, edi
 13218  
 13219  LBB1_830:
 13220  	LONG $0x01c1f641                           // test    r9b, 1
 13221  	JE   LBB1_832
 13222  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13223  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13224  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 13225  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 13226  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13227  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13228  
 13229  LBB1_832:
 13230  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13231  	JE   LBB1_1069
 13232  	JMP  LBB1_833
 13233  
 13234  LBB1_837:
 13235  	WORD $0xff31 // xor    edi, edi
 13236  
 13237  LBB1_838:
 13238  	LONG $0x01c1f641                           // test    r9b, 1
 13239  	JE   LBB1_840
 13240  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13241  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13242  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 13243  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 13244  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13245  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13246  
 13247  LBB1_840:
 13248  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13249  	JE   LBB1_1069
 13250  	JMP  LBB1_841
 13251  
 13252  LBB1_845:
 13253  	WORD $0xff31 // xor    edi, edi
 13254  
 13255  LBB1_846:
 13256  	LONG $0x01c1f641                           // test    r9b, 1
 13257  	JE   LBB1_848
 13258  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13259  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13260  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 13261  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 13262  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13263  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13264  
 13265  LBB1_848:
 13266  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13267  	JE   LBB1_1069
 13268  	JMP  LBB1_849
 13269  
 13270  LBB1_853:
 13271  	WORD $0xff31 // xor    edi, edi
 13272  
 13273  LBB1_854:
 13274  	LONG $0x01c1f641                           // test    r9b, 1
 13275  	JE   LBB1_856
 13276  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13277  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13278  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 13279  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 13280  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13281  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13282  
 13283  LBB1_856:
 13284  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13285  	JE   LBB1_1069
 13286  	JMP  LBB1_857
 13287  
 13288  LBB1_861:
 13289  	WORD $0xff31 // xor    edi, edi
 13290  
 13291  LBB1_862:
 13292  	LONG $0x01c1f641                           // test    r9b, 1
 13293  	JE   LBB1_864
 13294  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13295  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13296  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 13297  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 13298  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13299  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13300  
 13301  LBB1_864:
 13302  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13303  	JE   LBB1_1069
 13304  	JMP  LBB1_865
 13305  
 13306  LBB1_869:
 13307  	WORD $0xff31 // xor    edi, edi
 13308  
 13309  LBB1_870:
 13310  	LONG $0x01c1f641                           // test    r9b, 1
 13311  	JE   LBB1_872
 13312  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13313  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13314  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 13315  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 13316  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13317  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13318  
 13319  LBB1_872:
 13320  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13321  	JE   LBB1_1069
 13322  	JMP  LBB1_873
 13323  
 13324  LBB1_877:
 13325  	WORD $0xff31 // xor    edi, edi
 13326  
 13327  LBB1_878:
 13328  	LONG $0x01c1f641                           // test    r9b, 1
 13329  	JE   LBB1_880
 13330  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13331  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13332  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 13333  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 13334  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13335  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13336  
 13337  LBB1_880:
 13338  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13339  	JE   LBB1_1069
 13340  	JMP  LBB1_881
 13341  
 13342  LBB1_885:
 13343  	WORD $0xff31 // xor    edi, edi
 13344  
 13345  LBB1_886:
 13346  	LONG $0x01c1f641                           // test    r9b, 1
 13347  	JE   LBB1_888
 13348  	LONG $0x0c6f0ff3; BYTE $0x7a               // movdqu    xmm1, oword [rdx + 2*rdi]
 13349  	LONG $0x546f0ff3; WORD $0x107a             // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 13350  	LONG $0xc8f90f66                           // psubw    xmm1, xmm0
 13351  	LONG $0xd0f90f66                           // psubw    xmm2, xmm0
 13352  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 13353  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 13354  
 13355  LBB1_888:
 13356  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13357  	JE   LBB1_1069
 13358  	JMP  LBB1_889
 13359  
 13360  LBB1_893:
 13361  	WORD $0xff31 // xor    edi, edi
 13362  
 13363  LBB1_894:
 13364  	LONG $0x01c1f641               // test    r9b, 1
 13365  	JE   LBB1_896
 13366  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13367  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13368  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 13369  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 13370  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13371  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13372  
 13373  LBB1_896:
 13374  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13375  	JE   LBB1_1069
 13376  	JMP  LBB1_897
 13377  
 13378  LBB1_901:
 13379  	WORD $0xff31 // xor    edi, edi
 13380  
 13381  LBB1_902:
 13382  	LONG $0x01c1f641               // test    r9b, 1
 13383  	JE   LBB1_904
 13384  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13385  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13386  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 13387  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 13388  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13389  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13390  
 13391  LBB1_904:
 13392  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13393  	JE   LBB1_1069
 13394  	JMP  LBB1_905
 13395  
 13396  LBB1_909:
 13397  	WORD $0xff31 // xor    edi, edi
 13398  
 13399  LBB1_910:
 13400  	LONG $0x01c1f641                           // test    r9b, 1
 13401  	JE   LBB1_912
 13402  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13403  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13404  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 13405  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 13406  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13407  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13408  
 13409  LBB1_912:
 13410  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13411  	JE   LBB1_1069
 13412  	JMP  LBB1_913
 13413  
 13414  LBB1_917:
 13415  	WORD $0xff31 // xor    edi, edi
 13416  
 13417  LBB1_918:
 13418  	LONG $0x01c1f641               // test    r9b, 1
 13419  	JE   LBB1_920
 13420  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13421  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13422  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 13423  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 13424  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13425  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13426  
 13427  LBB1_920:
 13428  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13429  	JE   LBB1_1069
 13430  	JMP  LBB1_921
 13431  
 13432  LBB1_925:
 13433  	WORD $0xff31 // xor    edi, edi
 13434  
 13435  LBB1_926:
 13436  	LONG $0x01c1f641                           // test    r9b, 1
 13437  	JE   LBB1_928
 13438  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13439  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13440  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 13441  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 13442  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13443  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13444  
 13445  LBB1_928:
 13446  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13447  	JE   LBB1_1069
 13448  	JMP  LBB1_929
 13449  
 13450  LBB1_933:
 13451  	WORD $0xff31 // xor    edi, edi
 13452  
 13453  LBB1_934:
 13454  	LONG $0x01c1f641               // test    r9b, 1
 13455  	JE   LBB1_936
 13456  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13457  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13458  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 13459  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 13460  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13461  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13462  
 13463  LBB1_936:
 13464  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13465  	JE   LBB1_1069
 13466  	JMP  LBB1_937
 13467  
 13468  LBB1_941:
 13469  	WORD $0xff31 // xor    edi, edi
 13470  
 13471  LBB1_942:
 13472  	LONG $0x01c1f641                           // test    r9b, 1
 13473  	JE   LBB1_944
 13474  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13475  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13476  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 13477  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 13478  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13479  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13480  
 13481  LBB1_944:
 13482  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13483  	JE   LBB1_1069
 13484  	JMP  LBB1_945
 13485  
 13486  LBB1_949:
 13487  	WORD $0xff31 // xor    edi, edi
 13488  
 13489  LBB1_950:
 13490  	LONG $0x01c1f641               // test    r9b, 1
 13491  	JE   LBB1_952
 13492  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13493  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13494  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 13495  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 13496  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13497  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13498  
 13499  LBB1_952:
 13500  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13501  	JE   LBB1_1069
 13502  	JMP  LBB1_953
 13503  
 13504  LBB1_957:
 13505  	WORD $0xff31 // xor    edi, edi
 13506  
 13507  LBB1_958:
 13508  	LONG $0x01c1f641                           // test    r9b, 1
 13509  	JE   LBB1_960
 13510  	LONG $0x0c6f0ff3; BYTE $0xfa               // movdqu    xmm1, oword [rdx + 8*rdi]
 13511  	LONG $0x546f0ff3; WORD $0x10fa             // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 13512  	LONG $0xc8fb0f66                           // psubq    xmm1, xmm0
 13513  	LONG $0xd0fb0f66                           // psubq    xmm2, xmm0
 13514  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 13515  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 13516  
 13517  LBB1_960:
 13518  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13519  	JE   LBB1_1069
 13520  	JMP  LBB1_961
 13521  
 13522  LBB1_965:
 13523  	WORD $0xff31 // xor    edi, edi
 13524  
 13525  LBB1_966:
 13526  	LONG $0x01c1f641               // test    r9b, 1
 13527  	JE   LBB1_968
 13528  	LONG $0xba14100f               // movups    xmm2, oword [rdx + 4*rdi]
 13529  	LONG $0xba5c100f; BYTE $0x10   // movups    xmm3, oword [rdx + 4*rdi + 16]
 13530  	WORD $0x5c0f; BYTE $0xd1       // subps    xmm2, xmm1
 13531  	WORD $0x5c0f; BYTE $0xd9       // subps    xmm3, xmm1
 13532  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 13533  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 13534  
 13535  LBB1_968:
 13536  	WORD $0x3948; BYTE $0xc1 // cmp    rcx, rax
 13537  	JE   LBB1_1069
 13538  	JMP  LBB1_969
 13539  
 13540  LBB1_973:
 13541  	WORD $0xc031 // xor    eax, eax
 13542  
 13543  LBB1_974:
 13544  	LONG $0x01c1f641                           // test    r9b, 1
 13545  	JE   LBB1_976
 13546  	LONG $0x146f0ff3; BYTE $0x02               // movdqu    xmm2, oword [rdx + rax]
 13547  	LONG $0x5c6f0ff3; WORD $0x1002             // movdqu    xmm3, oword [rdx + rax + 16]
 13548  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 13549  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 13550  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 13551  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 13552  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 13553  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI1_0] */
 13554  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 13555  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 13556  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 13557  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 13558  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 13559  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 13560  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 13561  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 13562  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 13563  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 13564  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 13565  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 13566  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 13567  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 13568  
 13569  LBB1_976:
 13570  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 13571  	JE   LBB1_1069
 13572  	JMP  LBB1_977
 13573  
 13574  LBB1_981:
 13575  	WORD $0xc031 // xor    eax, eax
 13576  
 13577  LBB1_982:
 13578  	LONG $0x01c1f641                           // test    r9b, 1
 13579  	JE   LBB1_984
 13580  	LONG $0x146f0ff3; BYTE $0x02               // movdqu    xmm2, oword [rdx + rax]
 13581  	LONG $0x5c6f0ff3; WORD $0x1002             // movdqu    xmm3, oword [rdx + rax + 16]
 13582  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 13583  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 13584  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 13585  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 13586  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 13587  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI1_0] */
 13588  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 13589  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 13590  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 13591  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 13592  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 13593  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 13594  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 13595  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 13596  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 13597  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 13598  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 13599  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 13600  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 13601  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 13602  
 13603  LBB1_984:
 13604  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 13605  	JE   LBB1_1069
 13606  	JMP  LBB1_985
 13607  
 13608  LBB1_989:
 13609  	WORD $0xff31 // xor    edi, edi
 13610  
 13611  LBB1_990:
 13612  	LONG $0x01c1f641                           // test    r9b, 1
 13613  	JE   LBB1_992
 13614  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13615  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13616  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 13617  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 13618  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13619  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13620  
 13621  LBB1_992:
 13622  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13623  	JE   LBB1_1069
 13624  	JMP  LBB1_993
 13625  
 13626  LBB1_997:
 13627  	WORD $0xff31 // xor    edi, edi
 13628  
 13629  LBB1_998:
 13630  	LONG $0x01c1f641                           // test    r9b, 1
 13631  	JE   LBB1_1000
 13632  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13633  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13634  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 13635  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 13636  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13637  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13638  
 13639  LBB1_1000:
 13640  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13641  	JE   LBB1_1069
 13642  	JMP  LBB1_1001
 13643  
 13644  LBB1_1005:
 13645  	WORD $0xff31 // xor    edi, edi
 13646  
 13647  LBB1_1006:
 13648  	LONG $0x01c1f641                           // test    r9b, 1
 13649  	JE   LBB1_1008
 13650  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13651  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13652  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 13653  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 13654  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13655  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13656  
 13657  LBB1_1008:
 13658  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13659  	JE   LBB1_1069
 13660  	JMP  LBB1_1009
 13661  
 13662  LBB1_1013:
 13663  	WORD $0xff31 // xor    edi, edi
 13664  
 13665  LBB1_1014:
 13666  	LONG $0x01c1f641                           // test    r9b, 1
 13667  	JE   LBB1_1016
 13668  	LONG $0x0c6f0ff3; BYTE $0x3a               // movdqu    xmm1, oword [rdx + rdi]
 13669  	LONG $0x546f0ff3; WORD $0x103a             // movdqu    xmm2, oword [rdx + rdi + 16]
 13670  	LONG $0xc8f80f66                           // psubb    xmm1, xmm0
 13671  	LONG $0xd0f80f66                           // psubb    xmm2, xmm0
 13672  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 13673  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 13674  
 13675  LBB1_1016:
 13676  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13677  	JE   LBB1_1069
 13678  	JMP  LBB1_1017
 13679  
 13680  LBB1_1021:
 13681  	WORD $0xff31 // xor    edi, edi
 13682  
 13683  LBB1_1022:
 13684  	LONG $0x01c1f641                           // test    r9b, 1
 13685  	JE   LBB1_1024
 13686  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13687  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13688  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 13689  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 13690  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13691  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13692  
 13693  LBB1_1024:
 13694  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13695  	JE   LBB1_1069
 13696  	JMP  LBB1_1025
 13697  
 13698  LBB1_1029:
 13699  	WORD $0xff31 // xor    edi, edi
 13700  
 13701  LBB1_1030:
 13702  	LONG $0x01c1f641                           // test    r9b, 1
 13703  	JE   LBB1_1032
 13704  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13705  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13706  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 13707  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 13708  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13709  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13710  
 13711  LBB1_1032:
 13712  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13713  	JE   LBB1_1069
 13714  	JMP  LBB1_1033
 13715  
 13716  LBB1_1037:
 13717  	WORD $0xff31 // xor    edi, edi
 13718  
 13719  LBB1_1038:
 13720  	LONG $0x01c1f641                           // test    r9b, 1
 13721  	JE   LBB1_1040
 13722  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13723  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13724  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 13725  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 13726  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13727  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13728  
 13729  LBB1_1040:
 13730  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13731  	JE   LBB1_1069
 13732  	JMP  LBB1_1041
 13733  
 13734  LBB1_1045:
 13735  	WORD $0xff31 // xor    edi, edi
 13736  
 13737  LBB1_1046:
 13738  	LONG $0x01c1f641                           // test    r9b, 1
 13739  	JE   LBB1_1048
 13740  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13741  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13742  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 13743  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 13744  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13745  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13746  
 13747  LBB1_1048:
 13748  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13749  	JE   LBB1_1069
 13750  	JMP  LBB1_1049
 13751  
 13752  LBB1_1053:
 13753  	WORD $0xff31 // xor    edi, edi
 13754  
 13755  LBB1_1054:
 13756  	LONG $0x01c1f641                           // test    r9b, 1
 13757  	JE   LBB1_1056
 13758  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13759  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13760  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 13761  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 13762  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13763  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13764  
 13765  LBB1_1056:
 13766  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13767  	JE   LBB1_1069
 13768  	JMP  LBB1_1057
 13769  
 13770  LBB1_1061:
 13771  	WORD $0xff31 // xor    edi, edi
 13772  
 13773  LBB1_1062:
 13774  	LONG $0x01c1f641                           // test    r9b, 1
 13775  	JE   LBB1_1064
 13776  	LONG $0x0c6f0ff3; BYTE $0xba               // movdqu    xmm1, oword [rdx + 4*rdi]
 13777  	LONG $0x546f0ff3; WORD $0x10ba             // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 13778  	LONG $0xc8fa0f66                           // psubd    xmm1, xmm0
 13779  	LONG $0xd0fa0f66                           // psubd    xmm2, xmm0
 13780  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 13781  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 13782  
 13783  LBB1_1064:
 13784  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 13785  	JE   LBB1_1069
 13786  	JMP  LBB1_1065
 13787  
 13788  DATA LCDATA3<>+0x000(SB)/8, $0x00ff00ff00ff00ff
 13789  DATA LCDATA3<>+0x008(SB)/8, $0x00ff00ff00ff00ff
 13790  GLOBL LCDATA3<>(SB), 8, $16
 13791  
 13792  TEXT ยท_arithmetic_scalar_arr_sse4(SB), $0-48
 13793  
 13794  	MOVQ typ+0(FP), DI
 13795  	MOVQ op+8(FP), SI
 13796  	MOVQ inLeft+16(FP), DX
 13797  	MOVQ inRight+24(FP), CX
 13798  	MOVQ out+32(FP), R8
 13799  	MOVQ len+40(FP), R9
 13800  	LEAQ LCDATA3<>(SB), BP
 13801  
 13802  	LONG $0x14fe8040         // cmp    sil, 20
 13803  	JG   LBB2_12
 13804  	WORD $0x8440; BYTE $0xf6 // test    sil, sil
 13805  	JE   LBB2_23
 13806  	LONG $0x01fe8040         // cmp    sil, 1
 13807  	JE   LBB2_31
 13808  	LONG $0x02fe8040         // cmp    sil, 2
 13809  	JNE  LBB2_1069
 13810  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13811  	JG   LBB2_55
 13812  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 13813  	JLE  LBB2_97
 13814  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 13815  	JE   LBB2_157
 13816  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 13817  	JE   LBB2_160
 13818  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13819  	JNE  LBB2_1069
 13820  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 13821  	JLE  LBB2_1069
 13822  	WORD $0x028b             // mov    eax, dword [rdx]
 13823  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 13824  	LONG $0x08f98341         // cmp    r9d, 8
 13825  	JB   LBB2_11
 13826  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 13827  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 13828  	JBE  LBB2_453
 13829  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 13830  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 13831  	JBE  LBB2_453
 13832  
 13833  LBB2_11:
 13834  	WORD $0xf631 // xor    esi, esi
 13835  
 13836  LBB2_625:
 13837  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 13838  	WORD $0xf749; BYTE $0xd1 // not    r9
 13839  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 13840  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 13841  	LONG $0x03e78348         // and    rdi, 3
 13842  	JE   LBB2_627
 13843  
 13844  LBB2_626:
 13845  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 13846  	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
 13847  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 13848  	LONG $0x01c68348         // add    rsi, 1
 13849  	LONG $0xffc78348         // add    rdi, -1
 13850  	JNE  LBB2_626
 13851  
 13852  LBB2_627:
 13853  	LONG $0x03f98349 // cmp    r9, 3
 13854  	JB   LBB2_1069
 13855  
 13856  LBB2_628:
 13857  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 13858  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13859  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 13860  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 13861  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13862  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 13863  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 13864  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13865  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 13866  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 13867  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13868  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 13869  	LONG $0x04c68348             // add    rsi, 4
 13870  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 13871  	JNE  LBB2_628
 13872  	JMP  LBB2_1069
 13873  
 13874  LBB2_12:
 13875  	LONG $0x15fe8040         // cmp    sil, 21
 13876  	JE   LBB2_39
 13877  	LONG $0x16fe8040         // cmp    sil, 22
 13878  	JE   LBB2_47
 13879  	LONG $0x17fe8040         // cmp    sil, 23
 13880  	JNE  LBB2_1069
 13881  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13882  	JG   LBB2_62
 13883  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 13884  	JLE  LBB2_102
 13885  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 13886  	JE   LBB2_163
 13887  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 13888  	JE   LBB2_166
 13889  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13890  	JNE  LBB2_1069
 13891  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 13892  	JLE  LBB2_1069
 13893  	WORD $0x028b             // mov    eax, dword [rdx]
 13894  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 13895  	LONG $0x08f98341         // cmp    r9d, 8
 13896  	JB   LBB2_22
 13897  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 13898  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 13899  	JBE  LBB2_456
 13900  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 13901  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 13902  	JBE  LBB2_456
 13903  
 13904  LBB2_22:
 13905  	WORD $0xf631 // xor    esi, esi
 13906  
 13907  LBB2_633:
 13908  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 13909  	WORD $0xf749; BYTE $0xd1 // not    r9
 13910  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 13911  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 13912  	LONG $0x03e78348         // and    rdi, 3
 13913  	JE   LBB2_635
 13914  
 13915  LBB2_634:
 13916  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 13917  	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
 13918  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 13919  	LONG $0x01c68348         // add    rsi, 1
 13920  	LONG $0xffc78348         // add    rdi, -1
 13921  	JNE  LBB2_634
 13922  
 13923  LBB2_635:
 13924  	LONG $0x03f98349 // cmp    r9, 3
 13925  	JB   LBB2_1069
 13926  
 13927  LBB2_636:
 13928  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 13929  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13930  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 13931  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 13932  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13933  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 13934  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 13935  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13936  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 13937  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 13938  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 13939  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 13940  	LONG $0x04c68348             // add    rsi, 4
 13941  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 13942  	JNE  LBB2_636
 13943  	JMP  LBB2_1069
 13944  
 13945  LBB2_23:
 13946  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13947  	JG   LBB2_69
 13948  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 13949  	JLE  LBB2_107
 13950  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 13951  	JE   LBB2_169
 13952  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 13953  	JE   LBB2_172
 13954  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 13955  	JNE  LBB2_1069
 13956  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 13957  	JLE  LBB2_1069
 13958  	WORD $0x028b             // mov    eax, dword [rdx]
 13959  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 13960  	LONG $0x08f98341         // cmp    r9d, 8
 13961  	JB   LBB2_30
 13962  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 13963  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 13964  	JBE  LBB2_459
 13965  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 13966  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 13967  	JBE  LBB2_459
 13968  
 13969  LBB2_30:
 13970  	WORD $0xf631 // xor    esi, esi
 13971  
 13972  LBB2_641:
 13973  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 13974  	WORD $0xf749; BYTE $0xd1 // not    r9
 13975  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 13976  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 13977  	LONG $0x03e78348         // and    rdi, 3
 13978  	JE   LBB2_643
 13979  
 13980  LBB2_642:
 13981  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 13982  	WORD $0xc201             // add    edx, eax
 13983  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 13984  	LONG $0x01c68348         // add    rsi, 1
 13985  	LONG $0xffc78348         // add    rdi, -1
 13986  	JNE  LBB2_642
 13987  
 13988  LBB2_643:
 13989  	LONG $0x03f98349 // cmp    r9, 3
 13990  	JB   LBB2_1069
 13991  
 13992  LBB2_644:
 13993  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 13994  	WORD $0xc201                 // add    edx, eax
 13995  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 13996  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 13997  	WORD $0xc201                 // add    edx, eax
 13998  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 13999  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 14000  	WORD $0xc201                 // add    edx, eax
 14001  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 14002  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 14003  	WORD $0xc201                 // add    edx, eax
 14004  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 14005  	LONG $0x04c68348             // add    rsi, 4
 14006  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14007  	JNE  LBB2_644
 14008  	JMP  LBB2_1069
 14009  
 14010  LBB2_31:
 14011  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14012  	JG   LBB2_76
 14013  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14014  	JLE  LBB2_112
 14015  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 14016  	JE   LBB2_175
 14017  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 14018  	JE   LBB2_178
 14019  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14020  	JNE  LBB2_1069
 14021  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14022  	JLE  LBB2_1069
 14023  	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
 14024  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14025  	LONG $0x08f98341         // cmp    r9d, 8
 14026  	JB   LBB2_38
 14027  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 14028  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14029  	JBE  LBB2_462
 14030  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 14031  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14032  	JBE  LBB2_462
 14033  
 14034  LBB2_38:
 14035  	WORD $0xf631 // xor    esi, esi
 14036  
 14037  LBB2_649:
 14038  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 14039  	WORD $0xf748; BYTE $0xd2 // not    rdx
 14040  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 14041  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14042  	LONG $0x03e78348         // and    rdi, 3
 14043  	JE   LBB2_651
 14044  
 14045  LBB2_650:
 14046  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 14047  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
 14048  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
 14049  	LONG $0x01c68348         // add    rsi, 1
 14050  	LONG $0xffc78348         // add    rdi, -1
 14051  	JNE  LBB2_650
 14052  
 14053  LBB2_651:
 14054  	LONG $0x03fa8348 // cmp    rdx, 3
 14055  	JB   LBB2_1069
 14056  
 14057  LBB2_652:
 14058  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14059  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
 14060  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
 14061  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14062  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
 14063  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
 14064  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14065  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
 14066  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
 14067  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14068  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
 14069  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
 14070  	LONG $0x04c68348             // add    rsi, 4
 14071  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14072  	JNE  LBB2_652
 14073  	JMP  LBB2_1069
 14074  
 14075  LBB2_39:
 14076  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14077  	JG   LBB2_83
 14078  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14079  	JLE  LBB2_117
 14080  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 14081  	JE   LBB2_181
 14082  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 14083  	JE   LBB2_184
 14084  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14085  	JNE  LBB2_1069
 14086  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14087  	JLE  LBB2_1069
 14088  	WORD $0x028b             // mov    eax, dword [rdx]
 14089  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14090  	LONG $0x08f98341         // cmp    r9d, 8
 14091  	JB   LBB2_46
 14092  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 14093  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14094  	JBE  LBB2_465
 14095  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 14096  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14097  	JBE  LBB2_465
 14098  
 14099  LBB2_46:
 14100  	WORD $0xf631 // xor    esi, esi
 14101  
 14102  LBB2_657:
 14103  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 14104  	WORD $0xf749; BYTE $0xd1 // not    r9
 14105  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14106  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14107  	LONG $0x03e78348         // and    rdi, 3
 14108  	JE   LBB2_659
 14109  
 14110  LBB2_658:
 14111  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 14112  	WORD $0xc201             // add    edx, eax
 14113  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 14114  	LONG $0x01c68348         // add    rsi, 1
 14115  	LONG $0xffc78348         // add    rdi, -1
 14116  	JNE  LBB2_658
 14117  
 14118  LBB2_659:
 14119  	LONG $0x03f98349 // cmp    r9, 3
 14120  	JB   LBB2_1069
 14121  
 14122  LBB2_660:
 14123  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 14124  	WORD $0xc201                 // add    edx, eax
 14125  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 14126  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 14127  	WORD $0xc201                 // add    edx, eax
 14128  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 14129  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 14130  	WORD $0xc201                 // add    edx, eax
 14131  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 14132  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 14133  	WORD $0xc201                 // add    edx, eax
 14134  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 14135  	LONG $0x04c68348             // add    rsi, 4
 14136  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14137  	JNE  LBB2_660
 14138  	JMP  LBB2_1069
 14139  
 14140  LBB2_47:
 14141  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14142  	JG   LBB2_90
 14143  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14144  	JLE  LBB2_122
 14145  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 14146  	JE   LBB2_187
 14147  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 14148  	JE   LBB2_190
 14149  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 14150  	JNE  LBB2_1069
 14151  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14152  	JLE  LBB2_1069
 14153  	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
 14154  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14155  	LONG $0x08f98341         // cmp    r9d, 8
 14156  	JB   LBB2_54
 14157  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 14158  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14159  	JBE  LBB2_468
 14160  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 14161  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14162  	JBE  LBB2_468
 14163  
 14164  LBB2_54:
 14165  	WORD $0xf631 // xor    esi, esi
 14166  
 14167  LBB2_665:
 14168  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 14169  	WORD $0xf748; BYTE $0xd2 // not    rdx
 14170  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 14171  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14172  	LONG $0x03e78348         // and    rdi, 3
 14173  	JE   LBB2_667
 14174  
 14175  LBB2_666:
 14176  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 14177  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
 14178  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
 14179  	LONG $0x01c68348         // add    rsi, 1
 14180  	LONG $0xffc78348         // add    rdi, -1
 14181  	JNE  LBB2_666
 14182  
 14183  LBB2_667:
 14184  	LONG $0x03fa8348 // cmp    rdx, 3
 14185  	JB   LBB2_1069
 14186  
 14187  LBB2_668:
 14188  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14189  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
 14190  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
 14191  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14192  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
 14193  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
 14194  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14195  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
 14196  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
 14197  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14198  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
 14199  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
 14200  	LONG $0x04c68348             // add    rsi, 4
 14201  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14202  	JNE  LBB2_668
 14203  	JMP  LBB2_1069
 14204  
 14205  LBB2_55:
 14206  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14207  	JLE  LBB2_127
 14208  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14209  	JE   LBB2_193
 14210  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14211  	JE   LBB2_196
 14212  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14213  	JNE  LBB2_1069
 14214  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14215  	JLE  LBB2_1069
 14216  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14217  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14218  	LONG $0x04f98341         // cmp    r9d, 4
 14219  	JB   LBB2_61
 14220  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14221  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14222  	JBE  LBB2_471
 14223  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14224  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14225  	JBE  LBB2_471
 14226  
 14227  LBB2_61:
 14228  	WORD $0xd231 // xor    edx, edx
 14229  
 14230  LBB2_673:
 14231  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14232  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14233  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14234  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14235  	LONG $0x03e78348         // and    rdi, 3
 14236  	JE   LBB2_675
 14237  
 14238  LBB2_674:
 14239  	LONG $0x0c100ff2; BYTE $0xd1   // movsd    xmm1, qword [rcx + 8*rdx]
 14240  	LONG $0xc8590ff2               // mulsd    xmm1, xmm0
 14241  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14242  	LONG $0x01c28348               // add    rdx, 1
 14243  	LONG $0xffc78348               // add    rdi, -1
 14244  	JNE  LBB2_674
 14245  
 14246  LBB2_675:
 14247  	LONG $0x03fe8348 // cmp    rsi, 3
 14248  	JB   LBB2_1069
 14249  
 14250  LBB2_676:
 14251  	LONG $0x0c100ff2; BYTE $0xd1               // movsd    xmm1, qword [rcx + 8*rdx]
 14252  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14253  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14254  	LONG $0x4c100ff2; WORD $0x08d1             // movsd    xmm1, qword [rcx + 8*rdx + 8]
 14255  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14256  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14257  	LONG $0x4c100ff2; WORD $0x10d1             // movsd    xmm1, qword [rcx + 8*rdx + 16]
 14258  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14259  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14260  	LONG $0x4c100ff2; WORD $0x18d1             // movsd    xmm1, qword [rcx + 8*rdx + 24]
 14261  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14262  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14263  	LONG $0x04c28348                           // add    rdx, 4
 14264  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14265  	JNE  LBB2_676
 14266  	JMP  LBB2_1069
 14267  
 14268  LBB2_62:
 14269  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14270  	JLE  LBB2_132
 14271  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14272  	JE   LBB2_199
 14273  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14274  	JE   LBB2_202
 14275  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14276  	JNE  LBB2_1069
 14277  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14278  	JLE  LBB2_1069
 14279  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14280  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14281  	LONG $0x04f98341         // cmp    r9d, 4
 14282  	JB   LBB2_68
 14283  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14284  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14285  	JBE  LBB2_474
 14286  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14287  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14288  	JBE  LBB2_474
 14289  
 14290  LBB2_68:
 14291  	WORD $0xd231 // xor    edx, edx
 14292  
 14293  LBB2_681:
 14294  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14295  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14296  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14297  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14298  	LONG $0x03e78348         // and    rdi, 3
 14299  	JE   LBB2_683
 14300  
 14301  LBB2_682:
 14302  	LONG $0x0c100ff2; BYTE $0xd1   // movsd    xmm1, qword [rcx + 8*rdx]
 14303  	LONG $0xc8590ff2               // mulsd    xmm1, xmm0
 14304  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14305  	LONG $0x01c28348               // add    rdx, 1
 14306  	LONG $0xffc78348               // add    rdi, -1
 14307  	JNE  LBB2_682
 14308  
 14309  LBB2_683:
 14310  	LONG $0x03fe8348 // cmp    rsi, 3
 14311  	JB   LBB2_1069
 14312  
 14313  LBB2_684:
 14314  	LONG $0x0c100ff2; BYTE $0xd1               // movsd    xmm1, qword [rcx + 8*rdx]
 14315  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14316  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14317  	LONG $0x4c100ff2; WORD $0x08d1             // movsd    xmm1, qword [rcx + 8*rdx + 8]
 14318  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14319  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14320  	LONG $0x4c100ff2; WORD $0x10d1             // movsd    xmm1, qword [rcx + 8*rdx + 16]
 14321  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14322  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14323  	LONG $0x4c100ff2; WORD $0x18d1             // movsd    xmm1, qword [rcx + 8*rdx + 24]
 14324  	LONG $0xc8590ff2                           // mulsd    xmm1, xmm0
 14325  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14326  	LONG $0x04c28348                           // add    rdx, 4
 14327  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14328  	JNE  LBB2_684
 14329  	JMP  LBB2_1069
 14330  
 14331  LBB2_69:
 14332  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14333  	JLE  LBB2_137
 14334  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14335  	JE   LBB2_205
 14336  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14337  	JE   LBB2_208
 14338  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14339  	JNE  LBB2_1069
 14340  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14341  	JLE  LBB2_1069
 14342  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14343  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14344  	LONG $0x04f98341         // cmp    r9d, 4
 14345  	JB   LBB2_75
 14346  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14347  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14348  	JBE  LBB2_477
 14349  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14350  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14351  	JBE  LBB2_477
 14352  
 14353  LBB2_75:
 14354  	WORD $0xd231 // xor    edx, edx
 14355  
 14356  LBB2_689:
 14357  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14358  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14359  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14360  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14361  	LONG $0x03e78348         // and    rdi, 3
 14362  	JE   LBB2_691
 14363  
 14364  LBB2_690:
 14365  	LONG $0x0c100ff2; BYTE $0xd1   // movsd    xmm1, qword [rcx + 8*rdx]
 14366  	LONG $0xc8580ff2               // addsd    xmm1, xmm0
 14367  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14368  	LONG $0x01c28348               // add    rdx, 1
 14369  	LONG $0xffc78348               // add    rdi, -1
 14370  	JNE  LBB2_690
 14371  
 14372  LBB2_691:
 14373  	LONG $0x03fe8348 // cmp    rsi, 3
 14374  	JB   LBB2_1069
 14375  
 14376  LBB2_692:
 14377  	LONG $0x0c100ff2; BYTE $0xd1               // movsd    xmm1, qword [rcx + 8*rdx]
 14378  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14379  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14380  	LONG $0x4c100ff2; WORD $0x08d1             // movsd    xmm1, qword [rcx + 8*rdx + 8]
 14381  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14382  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14383  	LONG $0x4c100ff2; WORD $0x10d1             // movsd    xmm1, qword [rcx + 8*rdx + 16]
 14384  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14385  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14386  	LONG $0x4c100ff2; WORD $0x18d1             // movsd    xmm1, qword [rcx + 8*rdx + 24]
 14387  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14388  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14389  	LONG $0x04c28348                           // add    rdx, 4
 14390  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14391  	JNE  LBB2_692
 14392  	JMP  LBB2_1069
 14393  
 14394  LBB2_76:
 14395  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14396  	JLE  LBB2_142
 14397  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14398  	JE   LBB2_211
 14399  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14400  	JE   LBB2_214
 14401  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14402  	JNE  LBB2_1069
 14403  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14404  	JLE  LBB2_1069
 14405  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14406  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14407  	LONG $0x04f98341         // cmp    r9d, 4
 14408  	JB   LBB2_82
 14409  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14410  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14411  	JBE  LBB2_480
 14412  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14413  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14414  	JBE  LBB2_480
 14415  
 14416  LBB2_82:
 14417  	WORD $0xd231 // xor    edx, edx
 14418  
 14419  LBB2_697:
 14420  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14421  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14422  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14423  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14424  	LONG $0x03e78348         // and    rdi, 3
 14425  	JE   LBB2_699
 14426  
 14427  LBB2_698:
 14428  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 14429  	LONG $0x0c5c0ff2; BYTE $0xd1   // subsd    xmm1, qword [rcx + 8*rdx]
 14430  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14431  	LONG $0x01c28348               // add    rdx, 1
 14432  	LONG $0xffc78348               // add    rdi, -1
 14433  	JNE  LBB2_698
 14434  
 14435  LBB2_699:
 14436  	LONG $0x03fe8348 // cmp    rsi, 3
 14437  	JB   LBB2_1069
 14438  
 14439  LBB2_700:
 14440  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14441  	LONG $0x0c5c0ff2; BYTE $0xd1               // subsd    xmm1, qword [rcx + 8*rdx]
 14442  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14443  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14444  	LONG $0x4c5c0ff2; WORD $0x08d1             // subsd    xmm1, qword [rcx + 8*rdx + 8]
 14445  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14446  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14447  	LONG $0x4c5c0ff2; WORD $0x10d1             // subsd    xmm1, qword [rcx + 8*rdx + 16]
 14448  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14449  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14450  	LONG $0x4c5c0ff2; WORD $0x18d1             // subsd    xmm1, qword [rcx + 8*rdx + 24]
 14451  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14452  	LONG $0x04c28348                           // add    rdx, 4
 14453  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14454  	JNE  LBB2_700
 14455  	JMP  LBB2_1069
 14456  
 14457  LBB2_83:
 14458  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14459  	JLE  LBB2_147
 14460  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14461  	JE   LBB2_217
 14462  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14463  	JE   LBB2_220
 14464  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14465  	JNE  LBB2_1069
 14466  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14467  	JLE  LBB2_1069
 14468  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14469  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14470  	LONG $0x04f98341         // cmp    r9d, 4
 14471  	JB   LBB2_89
 14472  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14473  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14474  	JBE  LBB2_483
 14475  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14476  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14477  	JBE  LBB2_483
 14478  
 14479  LBB2_89:
 14480  	WORD $0xd231 // xor    edx, edx
 14481  
 14482  LBB2_705:
 14483  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14484  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14485  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14486  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14487  	LONG $0x03e78348         // and    rdi, 3
 14488  	JE   LBB2_707
 14489  
 14490  LBB2_706:
 14491  	LONG $0x0c100ff2; BYTE $0xd1   // movsd    xmm1, qword [rcx + 8*rdx]
 14492  	LONG $0xc8580ff2               // addsd    xmm1, xmm0
 14493  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14494  	LONG $0x01c28348               // add    rdx, 1
 14495  	LONG $0xffc78348               // add    rdi, -1
 14496  	JNE  LBB2_706
 14497  
 14498  LBB2_707:
 14499  	LONG $0x03fe8348 // cmp    rsi, 3
 14500  	JB   LBB2_1069
 14501  
 14502  LBB2_708:
 14503  	LONG $0x0c100ff2; BYTE $0xd1               // movsd    xmm1, qword [rcx + 8*rdx]
 14504  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14505  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14506  	LONG $0x4c100ff2; WORD $0x08d1             // movsd    xmm1, qword [rcx + 8*rdx + 8]
 14507  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14508  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14509  	LONG $0x4c100ff2; WORD $0x10d1             // movsd    xmm1, qword [rcx + 8*rdx + 16]
 14510  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14511  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14512  	LONG $0x4c100ff2; WORD $0x18d1             // movsd    xmm1, qword [rcx + 8*rdx + 24]
 14513  	LONG $0xc8580ff2                           // addsd    xmm1, xmm0
 14514  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14515  	LONG $0x04c28348                           // add    rdx, 4
 14516  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14517  	JNE  LBB2_708
 14518  	JMP  LBB2_1069
 14519  
 14520  LBB2_90:
 14521  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14522  	JLE  LBB2_152
 14523  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 14524  	JE   LBB2_223
 14525  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 14526  	JE   LBB2_226
 14527  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 14528  	JNE  LBB2_1069
 14529  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14530  	JLE  LBB2_1069
 14531  	LONG $0x02100ff2         // movsd    xmm0, qword [rdx]
 14532  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 14533  	LONG $0x04f98341         // cmp    r9d, 4
 14534  	JB   LBB2_96
 14535  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 14536  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14537  	JBE  LBB2_486
 14538  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 14539  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14540  	JBE  LBB2_486
 14541  
 14542  LBB2_96:
 14543  	WORD $0xd231 // xor    edx, edx
 14544  
 14545  LBB2_713:
 14546  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 14547  	WORD $0xf748; BYTE $0xd6 // not    rsi
 14548  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 14549  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 14550  	LONG $0x03e78348         // and    rdi, 3
 14551  	JE   LBB2_715
 14552  
 14553  LBB2_714:
 14554  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 14555  	LONG $0x0c5c0ff2; BYTE $0xd1   // subsd    xmm1, qword [rcx + 8*rdx]
 14556  	LONG $0x110f41f2; WORD $0xd00c // movsd    qword [r8 + 8*rdx], xmm1
 14557  	LONG $0x01c28348               // add    rdx, 1
 14558  	LONG $0xffc78348               // add    rdi, -1
 14559  	JNE  LBB2_714
 14560  
 14561  LBB2_715:
 14562  	LONG $0x03fe8348 // cmp    rsi, 3
 14563  	JB   LBB2_1069
 14564  
 14565  LBB2_716:
 14566  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14567  	LONG $0x0c5c0ff2; BYTE $0xd1               // subsd    xmm1, qword [rcx + 8*rdx]
 14568  	LONG $0x110f41f2; WORD $0xd00c             // movsd    qword [r8 + 8*rdx], xmm1
 14569  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14570  	LONG $0x4c5c0ff2; WORD $0x08d1             // subsd    xmm1, qword [rcx + 8*rdx + 8]
 14571  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x08 // movsd    qword [r8 + 8*rdx + 8], xmm1
 14572  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14573  	LONG $0x4c5c0ff2; WORD $0x10d1             // subsd    xmm1, qword [rcx + 8*rdx + 16]
 14574  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x10 // movsd    qword [r8 + 8*rdx + 16], xmm1
 14575  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 14576  	LONG $0x4c5c0ff2; WORD $0x18d1             // subsd    xmm1, qword [rcx + 8*rdx + 24]
 14577  	LONG $0x110f41f2; WORD $0xd04c; BYTE $0x18 // movsd    qword [r8 + 8*rdx + 24], xmm1
 14578  	LONG $0x04c28348                           // add    rdx, 4
 14579  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 14580  	JNE  LBB2_716
 14581  	JMP  LBB2_1069
 14582  
 14583  LBB2_97:
 14584  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14585  	JE   LBB2_229
 14586  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14587  	JNE  LBB2_1069
 14588  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14589  	JLE  LBB2_1069
 14590  	WORD $0x128a             // mov    dl, byte [rdx]
 14591  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14592  	LONG $0x20f98341         // cmp    r9d, 32
 14593  	JB   LBB2_101
 14594  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
 14595  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
 14596  	JBE  LBB2_489
 14597  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
 14598  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 14599  	JBE  LBB2_489
 14600  
 14601  LBB2_101:
 14602  	WORD $0xff31 // xor    edi, edi
 14603  
 14604  LBB2_721:
 14605  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
 14606  	WORD $0xf749; BYTE $0xd1 // not    r9
 14607  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14608  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
 14609  	LONG $0x03e68348         // and    rsi, 3
 14610  	JE   LBB2_723
 14611  
 14612  LBB2_722:
 14613  	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
 14614  	WORD $0xe2f6     // mul    dl
 14615  	LONG $0x38048841 // mov    byte [r8 + rdi], al
 14616  	LONG $0x01c78348 // add    rdi, 1
 14617  	LONG $0xffc68348 // add    rsi, -1
 14618  	JNE  LBB2_722
 14619  
 14620  LBB2_723:
 14621  	LONG $0x03f98349 // cmp    r9, 3
 14622  	JB   LBB2_1069
 14623  
 14624  LBB2_724:
 14625  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
 14626  	WORD $0xe2f6                 // mul    dl
 14627  	LONG $0x38048841             // mov    byte [r8 + rdi], al
 14628  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
 14629  	WORD $0xe2f6                 // mul    dl
 14630  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
 14631  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
 14632  	WORD $0xe2f6                 // mul    dl
 14633  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
 14634  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
 14635  	WORD $0xe2f6                 // mul    dl
 14636  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
 14637  	LONG $0x04c78348             // add    rdi, 4
 14638  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
 14639  	JNE  LBB2_724
 14640  	JMP  LBB2_1069
 14641  
 14642  LBB2_102:
 14643  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14644  	JE   LBB2_232
 14645  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14646  	JNE  LBB2_1069
 14647  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14648  	JLE  LBB2_1069
 14649  	WORD $0x128a             // mov    dl, byte [rdx]
 14650  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14651  	LONG $0x20f98341         // cmp    r9d, 32
 14652  	JB   LBB2_106
 14653  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
 14654  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
 14655  	JBE  LBB2_492
 14656  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
 14657  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 14658  	JBE  LBB2_492
 14659  
 14660  LBB2_106:
 14661  	WORD $0xff31 // xor    edi, edi
 14662  
 14663  LBB2_729:
 14664  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
 14665  	WORD $0xf749; BYTE $0xd1 // not    r9
 14666  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14667  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
 14668  	LONG $0x03e68348         // and    rsi, 3
 14669  	JE   LBB2_731
 14670  
 14671  LBB2_730:
 14672  	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
 14673  	WORD $0xe2f6     // mul    dl
 14674  	LONG $0x38048841 // mov    byte [r8 + rdi], al
 14675  	LONG $0x01c78348 // add    rdi, 1
 14676  	LONG $0xffc68348 // add    rsi, -1
 14677  	JNE  LBB2_730
 14678  
 14679  LBB2_731:
 14680  	LONG $0x03f98349 // cmp    r9, 3
 14681  	JB   LBB2_1069
 14682  
 14683  LBB2_732:
 14684  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
 14685  	WORD $0xe2f6                 // mul    dl
 14686  	LONG $0x38048841             // mov    byte [r8 + rdi], al
 14687  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
 14688  	WORD $0xe2f6                 // mul    dl
 14689  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
 14690  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
 14691  	WORD $0xe2f6                 // mul    dl
 14692  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
 14693  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
 14694  	WORD $0xe2f6                 // mul    dl
 14695  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
 14696  	LONG $0x04c78348             // add    rdi, 4
 14697  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
 14698  	JNE  LBB2_732
 14699  	JMP  LBB2_1069
 14700  
 14701  LBB2_107:
 14702  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14703  	JE   LBB2_235
 14704  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14705  	JNE  LBB2_1069
 14706  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14707  	JLE  LBB2_1069
 14708  	WORD $0x028a             // mov    al, byte [rdx]
 14709  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14710  	LONG $0x20f98341         // cmp    r9d, 32
 14711  	JB   LBB2_111
 14712  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 14713  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14714  	JBE  LBB2_495
 14715  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 14716  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14717  	JBE  LBB2_495
 14718  
 14719  LBB2_111:
 14720  	WORD $0xf631 // xor    esi, esi
 14721  
 14722  LBB2_737:
 14723  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 14724  	WORD $0xf749; BYTE $0xd1 // not    r9
 14725  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14726  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14727  	LONG $0x03e78348         // and    rdi, 3
 14728  	JE   LBB2_739
 14729  
 14730  LBB2_738:
 14731  	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
 14732  	WORD $0xc200     // add    dl, al
 14733  	LONG $0x30148841 // mov    byte [r8 + rsi], dl
 14734  	LONG $0x01c68348 // add    rsi, 1
 14735  	LONG $0xffc78348 // add    rdi, -1
 14736  	JNE  LBB2_738
 14737  
 14738  LBB2_739:
 14739  	LONG $0x03f98349 // cmp    r9, 3
 14740  	JB   LBB2_1069
 14741  
 14742  LBB2_740:
 14743  	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
 14744  	WORD $0xc200                 // add    dl, al
 14745  	LONG $0x30148841             // mov    byte [r8 + rsi], dl
 14746  	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
 14747  	WORD $0xc200                 // add    dl, al
 14748  	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
 14749  	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
 14750  	WORD $0xc200                 // add    dl, al
 14751  	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
 14752  	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
 14753  	WORD $0xc200                 // add    dl, al
 14754  	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
 14755  	LONG $0x04c68348             // add    rsi, 4
 14756  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14757  	JNE  LBB2_740
 14758  	JMP  LBB2_1069
 14759  
 14760  LBB2_112:
 14761  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14762  	JE   LBB2_238
 14763  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14764  	JNE  LBB2_1069
 14765  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14766  	JLE  LBB2_1069
 14767  	WORD $0x8a44; BYTE $0x1a // mov    r11b, byte [rdx]
 14768  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14769  	LONG $0x20f98341         // cmp    r9d, 32
 14770  	JB   LBB2_116
 14771  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 14772  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14773  	JBE  LBB2_498
 14774  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 14775  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14776  	JBE  LBB2_498
 14777  
 14778  LBB2_116:
 14779  	WORD $0xf631 // xor    esi, esi
 14780  
 14781  LBB2_745:
 14782  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 14783  	WORD $0xf748; BYTE $0xd2 // not    rdx
 14784  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 14785  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14786  	LONG $0x03e78348         // and    rdi, 3
 14787  	JE   LBB2_747
 14788  
 14789  LBB2_746:
 14790  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 14791  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
 14792  	LONG $0x30048841         // mov    byte [r8 + rsi], al
 14793  	LONG $0x01c68348         // add    rsi, 1
 14794  	LONG $0xffc78348         // add    rdi, -1
 14795  	JNE  LBB2_746
 14796  
 14797  LBB2_747:
 14798  	LONG $0x03fa8348 // cmp    rdx, 3
 14799  	JB   LBB2_1069
 14800  
 14801  LBB2_748:
 14802  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14803  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
 14804  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 14805  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14806  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
 14807  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 14808  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14809  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
 14810  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
 14811  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14812  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
 14813  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
 14814  	LONG $0x04c68348             // add    rsi, 4
 14815  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14816  	JNE  LBB2_748
 14817  	JMP  LBB2_1069
 14818  
 14819  LBB2_117:
 14820  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14821  	JE   LBB2_241
 14822  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14823  	JNE  LBB2_1069
 14824  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14825  	JLE  LBB2_1069
 14826  	WORD $0x028a             // mov    al, byte [rdx]
 14827  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14828  	LONG $0x20f98341         // cmp    r9d, 32
 14829  	JB   LBB2_121
 14830  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 14831  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14832  	JBE  LBB2_501
 14833  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 14834  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14835  	JBE  LBB2_501
 14836  
 14837  LBB2_121:
 14838  	WORD $0xf631 // xor    esi, esi
 14839  
 14840  LBB2_753:
 14841  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 14842  	WORD $0xf749; BYTE $0xd1 // not    r9
 14843  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14844  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14845  	LONG $0x03e78348         // and    rdi, 3
 14846  	JE   LBB2_755
 14847  
 14848  LBB2_754:
 14849  	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
 14850  	WORD $0xc200     // add    dl, al
 14851  	LONG $0x30148841 // mov    byte [r8 + rsi], dl
 14852  	LONG $0x01c68348 // add    rsi, 1
 14853  	LONG $0xffc78348 // add    rdi, -1
 14854  	JNE  LBB2_754
 14855  
 14856  LBB2_755:
 14857  	LONG $0x03f98349 // cmp    r9, 3
 14858  	JB   LBB2_1069
 14859  
 14860  LBB2_756:
 14861  	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
 14862  	WORD $0xc200                 // add    dl, al
 14863  	LONG $0x30148841             // mov    byte [r8 + rsi], dl
 14864  	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
 14865  	WORD $0xc200                 // add    dl, al
 14866  	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
 14867  	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
 14868  	WORD $0xc200                 // add    dl, al
 14869  	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
 14870  	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
 14871  	WORD $0xc200                 // add    dl, al
 14872  	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
 14873  	LONG $0x04c68348             // add    rsi, 4
 14874  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14875  	JNE  LBB2_756
 14876  	JMP  LBB2_1069
 14877  
 14878  LBB2_122:
 14879  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 14880  	JE   LBB2_244
 14881  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 14882  	JNE  LBB2_1069
 14883  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14884  	JLE  LBB2_1069
 14885  	WORD $0x8a44; BYTE $0x1a // mov    r11b, byte [rdx]
 14886  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14887  	LONG $0x20f98341         // cmp    r9d, 32
 14888  	JB   LBB2_126
 14889  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 14890  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14891  	JBE  LBB2_504
 14892  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 14893  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14894  	JBE  LBB2_504
 14895  
 14896  LBB2_126:
 14897  	WORD $0xf631 // xor    esi, esi
 14898  
 14899  LBB2_761:
 14900  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 14901  	WORD $0xf748; BYTE $0xd2 // not    rdx
 14902  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 14903  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14904  	LONG $0x03e78348         // and    rdi, 3
 14905  	JE   LBB2_763
 14906  
 14907  LBB2_762:
 14908  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 14909  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
 14910  	LONG $0x30048841         // mov    byte [r8 + rsi], al
 14911  	LONG $0x01c68348         // add    rsi, 1
 14912  	LONG $0xffc78348         // add    rdi, -1
 14913  	JNE  LBB2_762
 14914  
 14915  LBB2_763:
 14916  	LONG $0x03fa8348 // cmp    rdx, 3
 14917  	JB   LBB2_1069
 14918  
 14919  LBB2_764:
 14920  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14921  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
 14922  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 14923  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14924  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
 14925  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 14926  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14927  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
 14928  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
 14929  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 14930  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
 14931  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
 14932  	LONG $0x04c68348             // add    rsi, 4
 14933  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 14934  	JNE  LBB2_764
 14935  	JMP  LBB2_1069
 14936  
 14937  LBB2_127:
 14938  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 14939  	JE   LBB2_247
 14940  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14941  	JNE  LBB2_1069
 14942  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14943  	JLE  LBB2_1069
 14944  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 14945  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 14946  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
 14947  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
 14948  	LONG $0x03e18341         // and    r9d, 3
 14949  	LONG $0x03ff8348         // cmp    rdi, 3
 14950  	JAE  LBB2_319
 14951  	WORD $0xff31             // xor    edi, edi
 14952  	JMP  LBB2_321
 14953  
 14954  LBB2_132:
 14955  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 14956  	JE   LBB2_250
 14957  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14958  	JNE  LBB2_1069
 14959  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14960  	JLE  LBB2_1069
 14961  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 14962  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 14963  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
 14964  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
 14965  	LONG $0x03e18341         // and    r9d, 3
 14966  	LONG $0x03ff8348         // cmp    rdi, 3
 14967  	JAE  LBB2_324
 14968  	WORD $0xff31             // xor    edi, edi
 14969  	JMP  LBB2_326
 14970  
 14971  LBB2_137:
 14972  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 14973  	JE   LBB2_253
 14974  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 14975  	JNE  LBB2_1069
 14976  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 14977  	JLE  LBB2_1069
 14978  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 14979  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 14980  	LONG $0x04f98341         // cmp    r9d, 4
 14981  	JB   LBB2_141
 14982  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 14983  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 14984  	JBE  LBB2_507
 14985  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 14986  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 14987  	JBE  LBB2_507
 14988  
 14989  LBB2_141:
 14990  	WORD $0xf631 // xor    esi, esi
 14991  
 14992  LBB2_769:
 14993  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 14994  	WORD $0xf749; BYTE $0xd1 // not    r9
 14995  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 14996  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 14997  	LONG $0x03e78348         // and    rdi, 3
 14998  	JE   LBB2_771
 14999  
 15000  LBB2_770:
 15001  	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
 15002  	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
 15003  	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
 15004  	LONG $0x01c68348         // add    rsi, 1
 15005  	LONG $0xffc78348         // add    rdi, -1
 15006  	JNE  LBB2_770
 15007  
 15008  LBB2_771:
 15009  	LONG $0x03f98349 // cmp    r9, 3
 15010  	JB   LBB2_1069
 15011  
 15012  LBB2_772:
 15013  	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
 15014  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15015  	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
 15016  	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
 15017  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15018  	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
 15019  	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
 15020  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15021  	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
 15022  	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
 15023  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15024  	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
 15025  	LONG $0x04c68348             // add    rsi, 4
 15026  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 15027  	JNE  LBB2_772
 15028  	JMP  LBB2_1069
 15029  
 15030  LBB2_142:
 15031  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 15032  	JE   LBB2_256
 15033  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 15034  	JNE  LBB2_1069
 15035  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15036  	JLE  LBB2_1069
 15037  	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
 15038  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15039  	LONG $0x04f98341         // cmp    r9d, 4
 15040  	JB   LBB2_146
 15041  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 15042  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15043  	JBE  LBB2_510
 15044  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 15045  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15046  	JBE  LBB2_510
 15047  
 15048  LBB2_146:
 15049  	WORD $0xf631 // xor    esi, esi
 15050  
 15051  LBB2_777:
 15052  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 15053  	WORD $0xf748; BYTE $0xd2 // not    rdx
 15054  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 15055  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15056  	LONG $0x03e78348         // and    rdi, 3
 15057  	JE   LBB2_779
 15058  
 15059  LBB2_778:
 15060  	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
 15061  	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
 15062  	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
 15063  	LONG $0x01c68348         // add    rsi, 1
 15064  	LONG $0xffc78348         // add    rdi, -1
 15065  	JNE  LBB2_778
 15066  
 15067  LBB2_779:
 15068  	LONG $0x03fa8348 // cmp    rdx, 3
 15069  	JB   LBB2_1069
 15070  
 15071  LBB2_780:
 15072  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15073  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
 15074  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
 15075  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15076  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
 15077  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
 15078  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15079  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
 15080  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
 15081  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15082  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
 15083  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
 15084  	LONG $0x04c68348             // add    rsi, 4
 15085  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 15086  	JNE  LBB2_780
 15087  	JMP  LBB2_1069
 15088  
 15089  LBB2_147:
 15090  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 15091  	JE   LBB2_259
 15092  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 15093  	JNE  LBB2_1069
 15094  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15095  	JLE  LBB2_1069
 15096  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 15097  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15098  	LONG $0x04f98341         // cmp    r9d, 4
 15099  	JB   LBB2_151
 15100  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 15101  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15102  	JBE  LBB2_513
 15103  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 15104  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15105  	JBE  LBB2_513
 15106  
 15107  LBB2_151:
 15108  	WORD $0xf631 // xor    esi, esi
 15109  
 15110  LBB2_785:
 15111  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15112  	WORD $0xf749; BYTE $0xd1 // not    r9
 15113  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15114  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15115  	LONG $0x03e78348         // and    rdi, 3
 15116  	JE   LBB2_787
 15117  
 15118  LBB2_786:
 15119  	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
 15120  	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
 15121  	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
 15122  	LONG $0x01c68348         // add    rsi, 1
 15123  	LONG $0xffc78348         // add    rdi, -1
 15124  	JNE  LBB2_786
 15125  
 15126  LBB2_787:
 15127  	LONG $0x03f98349 // cmp    r9, 3
 15128  	JB   LBB2_1069
 15129  
 15130  LBB2_788:
 15131  	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
 15132  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15133  	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
 15134  	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
 15135  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15136  	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
 15137  	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
 15138  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15139  	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
 15140  	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
 15141  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 15142  	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
 15143  	LONG $0x04c68348             // add    rsi, 4
 15144  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 15145  	JNE  LBB2_788
 15146  	JMP  LBB2_1069
 15147  
 15148  LBB2_152:
 15149  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 15150  	JE   LBB2_262
 15151  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 15152  	JNE  LBB2_1069
 15153  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15154  	JLE  LBB2_1069
 15155  	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
 15156  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15157  	LONG $0x04f98341         // cmp    r9d, 4
 15158  	JB   LBB2_156
 15159  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 15160  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15161  	JBE  LBB2_516
 15162  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 15163  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15164  	JBE  LBB2_516
 15165  
 15166  LBB2_156:
 15167  	WORD $0xf631 // xor    esi, esi
 15168  
 15169  LBB2_793:
 15170  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 15171  	WORD $0xf748; BYTE $0xd2 // not    rdx
 15172  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 15173  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15174  	LONG $0x03e78348         // and    rdi, 3
 15175  	JE   LBB2_795
 15176  
 15177  LBB2_794:
 15178  	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
 15179  	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
 15180  	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
 15181  	LONG $0x01c68348         // add    rsi, 1
 15182  	LONG $0xffc78348         // add    rdi, -1
 15183  	JNE  LBB2_794
 15184  
 15185  LBB2_795:
 15186  	LONG $0x03fa8348 // cmp    rdx, 3
 15187  	JB   LBB2_1069
 15188  
 15189  LBB2_796:
 15190  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15191  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
 15192  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
 15193  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15194  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
 15195  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
 15196  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15197  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
 15198  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
 15199  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 15200  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
 15201  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
 15202  	LONG $0x04c68348             // add    rsi, 4
 15203  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 15204  	JNE  LBB2_796
 15205  	JMP  LBB2_1069
 15206  
 15207  LBB2_157:
 15208  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15209  	JLE  LBB2_1069
 15210  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15211  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15212  	LONG $0x10f98341         // cmp    r9d, 16
 15213  	JB   LBB2_159
 15214  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15215  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15216  	JBE  LBB2_519
 15217  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15218  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15219  	JBE  LBB2_519
 15220  
 15221  LBB2_159:
 15222  	WORD $0xf631 // xor    esi, esi
 15223  
 15224  LBB2_801:
 15225  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15226  	WORD $0xf749; BYTE $0xd1 // not    r9
 15227  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15228  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15229  	LONG $0x03e78348         // and    rdi, 3
 15230  	JE   LBB2_803
 15231  
 15232  LBB2_802:
 15233  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15234  	LONG $0xd0af0f66             // imul    dx, ax
 15235  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15236  	LONG $0x01c68348             // add    rsi, 1
 15237  	LONG $0xffc78348             // add    rdi, -1
 15238  	JNE  LBB2_802
 15239  
 15240  LBB2_803:
 15241  	LONG $0x03f98349 // cmp    r9, 3
 15242  	JB   LBB2_1069
 15243  
 15244  LBB2_804:
 15245  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15246  	LONG $0xd0af0f66               // imul    dx, ax
 15247  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15248  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15249  	LONG $0xd0af0f66               // imul    dx, ax
 15250  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15251  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15252  	LONG $0xd0af0f66               // imul    dx, ax
 15253  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15254  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15255  	LONG $0xd0af0f66               // imul    dx, ax
 15256  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15257  	LONG $0x04c68348               // add    rsi, 4
 15258  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15259  	JNE  LBB2_804
 15260  	JMP  LBB2_1069
 15261  
 15262  LBB2_160:
 15263  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15264  	JLE  LBB2_1069
 15265  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15266  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15267  	LONG $0x10f98341         // cmp    r9d, 16
 15268  	JB   LBB2_162
 15269  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15270  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15271  	JBE  LBB2_522
 15272  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15273  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15274  	JBE  LBB2_522
 15275  
 15276  LBB2_162:
 15277  	WORD $0xf631 // xor    esi, esi
 15278  
 15279  LBB2_809:
 15280  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15281  	WORD $0xf749; BYTE $0xd1 // not    r9
 15282  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15283  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15284  	LONG $0x03e78348         // and    rdi, 3
 15285  	JE   LBB2_811
 15286  
 15287  LBB2_810:
 15288  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15289  	LONG $0xd0af0f66             // imul    dx, ax
 15290  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15291  	LONG $0x01c68348             // add    rsi, 1
 15292  	LONG $0xffc78348             // add    rdi, -1
 15293  	JNE  LBB2_810
 15294  
 15295  LBB2_811:
 15296  	LONG $0x03f98349 // cmp    r9, 3
 15297  	JB   LBB2_1069
 15298  
 15299  LBB2_812:
 15300  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15301  	LONG $0xd0af0f66               // imul    dx, ax
 15302  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15303  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15304  	LONG $0xd0af0f66               // imul    dx, ax
 15305  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15306  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15307  	LONG $0xd0af0f66               // imul    dx, ax
 15308  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15309  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15310  	LONG $0xd0af0f66               // imul    dx, ax
 15311  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15312  	LONG $0x04c68348               // add    rsi, 4
 15313  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15314  	JNE  LBB2_812
 15315  	JMP  LBB2_1069
 15316  
 15317  LBB2_163:
 15318  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15319  	JLE  LBB2_1069
 15320  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15321  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15322  	LONG $0x10f98341         // cmp    r9d, 16
 15323  	JB   LBB2_165
 15324  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15325  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15326  	JBE  LBB2_525
 15327  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15328  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15329  	JBE  LBB2_525
 15330  
 15331  LBB2_165:
 15332  	WORD $0xf631 // xor    esi, esi
 15333  
 15334  LBB2_817:
 15335  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15336  	WORD $0xf749; BYTE $0xd1 // not    r9
 15337  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15338  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15339  	LONG $0x03e78348         // and    rdi, 3
 15340  	JE   LBB2_819
 15341  
 15342  LBB2_818:
 15343  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15344  	LONG $0xd0af0f66             // imul    dx, ax
 15345  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15346  	LONG $0x01c68348             // add    rsi, 1
 15347  	LONG $0xffc78348             // add    rdi, -1
 15348  	JNE  LBB2_818
 15349  
 15350  LBB2_819:
 15351  	LONG $0x03f98349 // cmp    r9, 3
 15352  	JB   LBB2_1069
 15353  
 15354  LBB2_820:
 15355  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15356  	LONG $0xd0af0f66               // imul    dx, ax
 15357  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15358  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15359  	LONG $0xd0af0f66               // imul    dx, ax
 15360  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15361  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15362  	LONG $0xd0af0f66               // imul    dx, ax
 15363  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15364  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15365  	LONG $0xd0af0f66               // imul    dx, ax
 15366  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15367  	LONG $0x04c68348               // add    rsi, 4
 15368  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15369  	JNE  LBB2_820
 15370  	JMP  LBB2_1069
 15371  
 15372  LBB2_166:
 15373  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15374  	JLE  LBB2_1069
 15375  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15376  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15377  	LONG $0x10f98341         // cmp    r9d, 16
 15378  	JB   LBB2_168
 15379  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15380  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15381  	JBE  LBB2_528
 15382  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15383  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15384  	JBE  LBB2_528
 15385  
 15386  LBB2_168:
 15387  	WORD $0xf631 // xor    esi, esi
 15388  
 15389  LBB2_825:
 15390  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15391  	WORD $0xf749; BYTE $0xd1 // not    r9
 15392  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15393  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15394  	LONG $0x03e78348         // and    rdi, 3
 15395  	JE   LBB2_827
 15396  
 15397  LBB2_826:
 15398  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15399  	LONG $0xd0af0f66             // imul    dx, ax
 15400  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15401  	LONG $0x01c68348             // add    rsi, 1
 15402  	LONG $0xffc78348             // add    rdi, -1
 15403  	JNE  LBB2_826
 15404  
 15405  LBB2_827:
 15406  	LONG $0x03f98349 // cmp    r9, 3
 15407  	JB   LBB2_1069
 15408  
 15409  LBB2_828:
 15410  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15411  	LONG $0xd0af0f66               // imul    dx, ax
 15412  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15413  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15414  	LONG $0xd0af0f66               // imul    dx, ax
 15415  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15416  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15417  	LONG $0xd0af0f66               // imul    dx, ax
 15418  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15419  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15420  	LONG $0xd0af0f66               // imul    dx, ax
 15421  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15422  	LONG $0x04c68348               // add    rsi, 4
 15423  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15424  	JNE  LBB2_828
 15425  	JMP  LBB2_1069
 15426  
 15427  LBB2_169:
 15428  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15429  	JLE  LBB2_1069
 15430  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15431  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15432  	LONG $0x10f98341         // cmp    r9d, 16
 15433  	JB   LBB2_171
 15434  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15435  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15436  	JBE  LBB2_531
 15437  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15438  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15439  	JBE  LBB2_531
 15440  
 15441  LBB2_171:
 15442  	WORD $0xf631 // xor    esi, esi
 15443  
 15444  LBB2_833:
 15445  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15446  	WORD $0xf749; BYTE $0xd1 // not    r9
 15447  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15448  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15449  	LONG $0x03e78348         // and    rdi, 3
 15450  	JE   LBB2_835
 15451  
 15452  LBB2_834:
 15453  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15454  	WORD $0x0166; BYTE $0xc2     // add    dx, ax
 15455  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15456  	LONG $0x01c68348             // add    rsi, 1
 15457  	LONG $0xffc78348             // add    rdi, -1
 15458  	JNE  LBB2_834
 15459  
 15460  LBB2_835:
 15461  	LONG $0x03f98349 // cmp    r9, 3
 15462  	JB   LBB2_1069
 15463  
 15464  LBB2_836:
 15465  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15466  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15467  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15468  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15469  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15470  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15471  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15472  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15473  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15474  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15475  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15476  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15477  	LONG $0x04c68348               // add    rsi, 4
 15478  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15479  	JNE  LBB2_836
 15480  	JMP  LBB2_1069
 15481  
 15482  LBB2_172:
 15483  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15484  	JLE  LBB2_1069
 15485  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15486  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15487  	LONG $0x10f98341         // cmp    r9d, 16
 15488  	JB   LBB2_174
 15489  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15490  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15491  	JBE  LBB2_534
 15492  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15493  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15494  	JBE  LBB2_534
 15495  
 15496  LBB2_174:
 15497  	WORD $0xf631 // xor    esi, esi
 15498  
 15499  LBB2_841:
 15500  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15501  	WORD $0xf749; BYTE $0xd1 // not    r9
 15502  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15503  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15504  	LONG $0x03e78348         // and    rdi, 3
 15505  	JE   LBB2_843
 15506  
 15507  LBB2_842:
 15508  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15509  	WORD $0x0166; BYTE $0xc2     // add    dx, ax
 15510  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15511  	LONG $0x01c68348             // add    rsi, 1
 15512  	LONG $0xffc78348             // add    rdi, -1
 15513  	JNE  LBB2_842
 15514  
 15515  LBB2_843:
 15516  	LONG $0x03f98349 // cmp    r9, 3
 15517  	JB   LBB2_1069
 15518  
 15519  LBB2_844:
 15520  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15521  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15522  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15523  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15524  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15525  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15526  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15527  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15528  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15529  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15530  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15531  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15532  	LONG $0x04c68348               // add    rsi, 4
 15533  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15534  	JNE  LBB2_844
 15535  	JMP  LBB2_1069
 15536  
 15537  LBB2_175:
 15538  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15539  	JLE  LBB2_1069
 15540  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15541  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15542  	LONG $0x10f98341         // cmp    r9d, 16
 15543  	JB   LBB2_177
 15544  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15545  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15546  	JBE  LBB2_537
 15547  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15548  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15549  	JBE  LBB2_537
 15550  
 15551  LBB2_177:
 15552  	WORD $0xf631 // xor    esi, esi
 15553  
 15554  LBB2_849:
 15555  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15556  	WORD $0xf749; BYTE $0xd1 // not    r9
 15557  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15558  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15559  	LONG $0x03e78348         // and    rdi, 3
 15560  	JE   LBB2_851
 15561  
 15562  LBB2_850:
 15563  	WORD $0xc289                 // mov    edx, eax
 15564  	LONG $0x71142b66             // sub    dx, word [rcx + 2*rsi]
 15565  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15566  	LONG $0x01c68348             // add    rsi, 1
 15567  	LONG $0xffc78348             // add    rdi, -1
 15568  	JNE  LBB2_850
 15569  
 15570  LBB2_851:
 15571  	LONG $0x03f98349 // cmp    r9, 3
 15572  	JB   LBB2_1069
 15573  
 15574  LBB2_852:
 15575  	WORD $0xc289                   // mov    edx, eax
 15576  	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
 15577  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15578  	WORD $0xc289                   // mov    edx, eax
 15579  	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
 15580  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15581  	WORD $0xc289                   // mov    edx, eax
 15582  	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
 15583  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15584  	WORD $0xc289                   // mov    edx, eax
 15585  	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
 15586  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15587  	LONG $0x04c68348               // add    rsi, 4
 15588  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15589  	JNE  LBB2_852
 15590  	JMP  LBB2_1069
 15591  
 15592  LBB2_178:
 15593  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15594  	JLE  LBB2_1069
 15595  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15596  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15597  	LONG $0x10f98341         // cmp    r9d, 16
 15598  	JB   LBB2_180
 15599  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15600  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15601  	JBE  LBB2_540
 15602  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15603  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15604  	JBE  LBB2_540
 15605  
 15606  LBB2_180:
 15607  	WORD $0xf631 // xor    esi, esi
 15608  
 15609  LBB2_857:
 15610  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15611  	WORD $0xf749; BYTE $0xd1 // not    r9
 15612  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15613  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15614  	LONG $0x03e78348         // and    rdi, 3
 15615  	JE   LBB2_859
 15616  
 15617  LBB2_858:
 15618  	WORD $0xc289                 // mov    edx, eax
 15619  	LONG $0x71142b66             // sub    dx, word [rcx + 2*rsi]
 15620  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15621  	LONG $0x01c68348             // add    rsi, 1
 15622  	LONG $0xffc78348             // add    rdi, -1
 15623  	JNE  LBB2_858
 15624  
 15625  LBB2_859:
 15626  	LONG $0x03f98349 // cmp    r9, 3
 15627  	JB   LBB2_1069
 15628  
 15629  LBB2_860:
 15630  	WORD $0xc289                   // mov    edx, eax
 15631  	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
 15632  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15633  	WORD $0xc289                   // mov    edx, eax
 15634  	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
 15635  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15636  	WORD $0xc289                   // mov    edx, eax
 15637  	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
 15638  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15639  	WORD $0xc289                   // mov    edx, eax
 15640  	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
 15641  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15642  	LONG $0x04c68348               // add    rsi, 4
 15643  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15644  	JNE  LBB2_860
 15645  	JMP  LBB2_1069
 15646  
 15647  LBB2_181:
 15648  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15649  	JLE  LBB2_1069
 15650  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15651  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15652  	LONG $0x10f98341         // cmp    r9d, 16
 15653  	JB   LBB2_183
 15654  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15655  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15656  	JBE  LBB2_543
 15657  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15658  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15659  	JBE  LBB2_543
 15660  
 15661  LBB2_183:
 15662  	WORD $0xf631 // xor    esi, esi
 15663  
 15664  LBB2_865:
 15665  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15666  	WORD $0xf749; BYTE $0xd1 // not    r9
 15667  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15668  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15669  	LONG $0x03e78348         // and    rdi, 3
 15670  	JE   LBB2_867
 15671  
 15672  LBB2_866:
 15673  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15674  	WORD $0x0166; BYTE $0xc2     // add    dx, ax
 15675  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15676  	LONG $0x01c68348             // add    rsi, 1
 15677  	LONG $0xffc78348             // add    rdi, -1
 15678  	JNE  LBB2_866
 15679  
 15680  LBB2_867:
 15681  	LONG $0x03f98349 // cmp    r9, 3
 15682  	JB   LBB2_1069
 15683  
 15684  LBB2_868:
 15685  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15686  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15687  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15688  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15689  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15690  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15691  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15692  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15693  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15694  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15695  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15696  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15697  	LONG $0x04c68348               // add    rsi, 4
 15698  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15699  	JNE  LBB2_868
 15700  	JMP  LBB2_1069
 15701  
 15702  LBB2_184:
 15703  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15704  	JLE  LBB2_1069
 15705  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15706  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15707  	LONG $0x10f98341         // cmp    r9d, 16
 15708  	JB   LBB2_186
 15709  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15710  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15711  	JBE  LBB2_546
 15712  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15713  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15714  	JBE  LBB2_546
 15715  
 15716  LBB2_186:
 15717  	WORD $0xf631 // xor    esi, esi
 15718  
 15719  LBB2_873:
 15720  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15721  	WORD $0xf749; BYTE $0xd1 // not    r9
 15722  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15723  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15724  	LONG $0x03e78348         // and    rdi, 3
 15725  	JE   LBB2_875
 15726  
 15727  LBB2_874:
 15728  	LONG $0x7114b70f             // movzx    edx, word [rcx + 2*rsi]
 15729  	WORD $0x0166; BYTE $0xc2     // add    dx, ax
 15730  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15731  	LONG $0x01c68348             // add    rsi, 1
 15732  	LONG $0xffc78348             // add    rdi, -1
 15733  	JNE  LBB2_874
 15734  
 15735  LBB2_875:
 15736  	LONG $0x03f98349 // cmp    r9, 3
 15737  	JB   LBB2_1069
 15738  
 15739  LBB2_876:
 15740  	LONG $0x7114b70f               // movzx    edx, word [rcx + 2*rsi]
 15741  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15742  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15743  	LONG $0x7154b70f; BYTE $0x02   // movzx    edx, word [rcx + 2*rsi + 2]
 15744  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15745  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15746  	LONG $0x7154b70f; BYTE $0x04   // movzx    edx, word [rcx + 2*rsi + 4]
 15747  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15748  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15749  	LONG $0x7154b70f; BYTE $0x06   // movzx    edx, word [rcx + 2*rsi + 6]
 15750  	WORD $0x0166; BYTE $0xc2       // add    dx, ax
 15751  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15752  	LONG $0x04c68348               // add    rsi, 4
 15753  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15754  	JNE  LBB2_876
 15755  	JMP  LBB2_1069
 15756  
 15757  LBB2_187:
 15758  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15759  	JLE  LBB2_1069
 15760  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15761  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15762  	LONG $0x10f98341         // cmp    r9d, 16
 15763  	JB   LBB2_189
 15764  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15765  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15766  	JBE  LBB2_549
 15767  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15768  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15769  	JBE  LBB2_549
 15770  
 15771  LBB2_189:
 15772  	WORD $0xf631 // xor    esi, esi
 15773  
 15774  LBB2_881:
 15775  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15776  	WORD $0xf749; BYTE $0xd1 // not    r9
 15777  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15778  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15779  	LONG $0x03e78348         // and    rdi, 3
 15780  	JE   LBB2_883
 15781  
 15782  LBB2_882:
 15783  	WORD $0xc289                 // mov    edx, eax
 15784  	LONG $0x71142b66             // sub    dx, word [rcx + 2*rsi]
 15785  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15786  	LONG $0x01c68348             // add    rsi, 1
 15787  	LONG $0xffc78348             // add    rdi, -1
 15788  	JNE  LBB2_882
 15789  
 15790  LBB2_883:
 15791  	LONG $0x03f98349 // cmp    r9, 3
 15792  	JB   LBB2_1069
 15793  
 15794  LBB2_884:
 15795  	WORD $0xc289                   // mov    edx, eax
 15796  	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
 15797  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15798  	WORD $0xc289                   // mov    edx, eax
 15799  	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
 15800  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15801  	WORD $0xc289                   // mov    edx, eax
 15802  	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
 15803  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15804  	WORD $0xc289                   // mov    edx, eax
 15805  	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
 15806  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15807  	LONG $0x04c68348               // add    rsi, 4
 15808  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15809  	JNE  LBB2_884
 15810  	JMP  LBB2_1069
 15811  
 15812  LBB2_190:
 15813  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15814  	JLE  LBB2_1069
 15815  	WORD $0xb70f; BYTE $0x02 // movzx    eax, word [rdx]
 15816  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 15817  	LONG $0x10f98341         // cmp    r9d, 16
 15818  	JB   LBB2_192
 15819  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 15820  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15821  	JBE  LBB2_552
 15822  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 15823  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15824  	JBE  LBB2_552
 15825  
 15826  LBB2_192:
 15827  	WORD $0xf631 // xor    esi, esi
 15828  
 15829  LBB2_889:
 15830  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 15831  	WORD $0xf749; BYTE $0xd1 // not    r9
 15832  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 15833  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 15834  	LONG $0x03e78348         // and    rdi, 3
 15835  	JE   LBB2_891
 15836  
 15837  LBB2_890:
 15838  	WORD $0xc289                 // mov    edx, eax
 15839  	LONG $0x71142b66             // sub    dx, word [rcx + 2*rsi]
 15840  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 15841  	LONG $0x01c68348             // add    rsi, 1
 15842  	LONG $0xffc78348             // add    rdi, -1
 15843  	JNE  LBB2_890
 15844  
 15845  LBB2_891:
 15846  	LONG $0x03f98349 // cmp    r9, 3
 15847  	JB   LBB2_1069
 15848  
 15849  LBB2_892:
 15850  	WORD $0xc289                   // mov    edx, eax
 15851  	LONG $0x71142b66               // sub    dx, word [rcx + 2*rsi]
 15852  	LONG $0x14894166; BYTE $0x70   // mov    word [r8 + 2*rsi], dx
 15853  	WORD $0xc289                   // mov    edx, eax
 15854  	LONG $0x71542b66; BYTE $0x02   // sub    dx, word [rcx + 2*rsi + 2]
 15855  	LONG $0x54894166; WORD $0x0270 // mov    word [r8 + 2*rsi + 2], dx
 15856  	WORD $0xc289                   // mov    edx, eax
 15857  	LONG $0x71542b66; BYTE $0x04   // sub    dx, word [rcx + 2*rsi + 4]
 15858  	LONG $0x54894166; WORD $0x0470 // mov    word [r8 + 2*rsi + 4], dx
 15859  	WORD $0xc289                   // mov    edx, eax
 15860  	LONG $0x71542b66; BYTE $0x06   // sub    dx, word [rcx + 2*rsi + 6]
 15861  	LONG $0x54894166; WORD $0x0670 // mov    word [r8 + 2*rsi + 6], dx
 15862  	LONG $0x04c68348               // add    rsi, 4
 15863  	WORD $0x3949; BYTE $0xf2       // cmp    r10, rsi
 15864  	JNE  LBB2_892
 15865  	JMP  LBB2_1069
 15866  
 15867  LBB2_193:
 15868  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15869  	JLE  LBB2_1069
 15870  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 15871  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 15872  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
 15873  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
 15874  	LONG $0x03e18341         // and    r9d, 3
 15875  	LONG $0x03ff8348         // cmp    rdi, 3
 15876  	JAE  LBB2_377
 15877  	WORD $0xff31             // xor    edi, edi
 15878  	JMP  LBB2_379
 15879  
 15880  LBB2_196:
 15881  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15882  	JLE  LBB2_1069
 15883  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 15884  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 15885  	LONG $0x08f98341         // cmp    r9d, 8
 15886  	JB   LBB2_198
 15887  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 15888  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15889  	JBE  LBB2_555
 15890  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 15891  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15892  	JBE  LBB2_555
 15893  
 15894  LBB2_198:
 15895  	WORD $0xd231 // xor    edx, edx
 15896  
 15897  LBB2_897:
 15898  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 15899  	WORD $0xf748; BYTE $0xd6 // not    rsi
 15900  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 15901  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 15902  	LONG $0x03e78348         // and    rdi, 3
 15903  	JE   LBB2_899
 15904  
 15905  LBB2_898:
 15906  	LONG $0x0c100ff3; BYTE $0x91   // movss    xmm1, dword [rcx + 4*rdx]
 15907  	LONG $0xc8590ff3               // mulss    xmm1, xmm0
 15908  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 15909  	LONG $0x01c28348               // add    rdx, 1
 15910  	LONG $0xffc78348               // add    rdi, -1
 15911  	JNE  LBB2_898
 15912  
 15913  LBB2_899:
 15914  	LONG $0x03fe8348 // cmp    rsi, 3
 15915  	JB   LBB2_1069
 15916  
 15917  LBB2_900:
 15918  	LONG $0x0c100ff3; BYTE $0x91               // movss    xmm1, dword [rcx + 4*rdx]
 15919  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15920  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 15921  	LONG $0x4c100ff3; WORD $0x0491             // movss    xmm1, dword [rcx + 4*rdx + 4]
 15922  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15923  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 15924  	LONG $0x4c100ff3; WORD $0x0891             // movss    xmm1, dword [rcx + 4*rdx + 8]
 15925  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15926  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 15927  	LONG $0x4c100ff3; WORD $0x0c91             // movss    xmm1, dword [rcx + 4*rdx + 12]
 15928  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15929  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 15930  	LONG $0x04c28348                           // add    rdx, 4
 15931  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 15932  	JNE  LBB2_900
 15933  	JMP  LBB2_1069
 15934  
 15935  LBB2_199:
 15936  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15937  	JLE  LBB2_1069
 15938  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 15939  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 15940  	LONG $0xff7e8d48         // lea    rdi, [rsi - 1]
 15941  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
 15942  	LONG $0x03e18341         // and    r9d, 3
 15943  	LONG $0x03ff8348         // cmp    rdi, 3
 15944  	JAE  LBB2_385
 15945  	WORD $0xff31             // xor    edi, edi
 15946  	JMP  LBB2_387
 15947  
 15948  LBB2_202:
 15949  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 15950  	JLE  LBB2_1069
 15951  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 15952  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 15953  	LONG $0x08f98341         // cmp    r9d, 8
 15954  	JB   LBB2_204
 15955  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 15956  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 15957  	JBE  LBB2_558
 15958  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 15959  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 15960  	JBE  LBB2_558
 15961  
 15962  LBB2_204:
 15963  	WORD $0xd231 // xor    edx, edx
 15964  
 15965  LBB2_905:
 15966  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 15967  	WORD $0xf748; BYTE $0xd6 // not    rsi
 15968  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 15969  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 15970  	LONG $0x03e78348         // and    rdi, 3
 15971  	JE   LBB2_907
 15972  
 15973  LBB2_906:
 15974  	LONG $0x0c100ff3; BYTE $0x91   // movss    xmm1, dword [rcx + 4*rdx]
 15975  	LONG $0xc8590ff3               // mulss    xmm1, xmm0
 15976  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 15977  	LONG $0x01c28348               // add    rdx, 1
 15978  	LONG $0xffc78348               // add    rdi, -1
 15979  	JNE  LBB2_906
 15980  
 15981  LBB2_907:
 15982  	LONG $0x03fe8348 // cmp    rsi, 3
 15983  	JB   LBB2_1069
 15984  
 15985  LBB2_908:
 15986  	LONG $0x0c100ff3; BYTE $0x91               // movss    xmm1, dword [rcx + 4*rdx]
 15987  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15988  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 15989  	LONG $0x4c100ff3; WORD $0x0491             // movss    xmm1, dword [rcx + 4*rdx + 4]
 15990  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15991  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 15992  	LONG $0x4c100ff3; WORD $0x0891             // movss    xmm1, dword [rcx + 4*rdx + 8]
 15993  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15994  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 15995  	LONG $0x4c100ff3; WORD $0x0c91             // movss    xmm1, dword [rcx + 4*rdx + 12]
 15996  	LONG $0xc8590ff3                           // mulss    xmm1, xmm0
 15997  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 15998  	LONG $0x04c28348                           // add    rdx, 4
 15999  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 16000  	JNE  LBB2_908
 16001  	JMP  LBB2_1069
 16002  
 16003  LBB2_205:
 16004  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16005  	JLE  LBB2_1069
 16006  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 16007  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16008  	LONG $0x04f98341         // cmp    r9d, 4
 16009  	JB   LBB2_207
 16010  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 16011  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16012  	JBE  LBB2_561
 16013  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 16014  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16015  	JBE  LBB2_561
 16016  
 16017  LBB2_207:
 16018  	WORD $0xf631 // xor    esi, esi
 16019  
 16020  LBB2_913:
 16021  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16022  	WORD $0xf749; BYTE $0xd1 // not    r9
 16023  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16024  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16025  	LONG $0x03e78348         // and    rdi, 3
 16026  	JE   LBB2_915
 16027  
 16028  LBB2_914:
 16029  	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
 16030  	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
 16031  	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
 16032  	LONG $0x01c68348         // add    rsi, 1
 16033  	LONG $0xffc78348         // add    rdi, -1
 16034  	JNE  LBB2_914
 16035  
 16036  LBB2_915:
 16037  	LONG $0x03f98349 // cmp    r9, 3
 16038  	JB   LBB2_1069
 16039  
 16040  LBB2_916:
 16041  	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
 16042  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16043  	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
 16044  	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
 16045  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16046  	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
 16047  	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
 16048  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16049  	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
 16050  	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
 16051  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16052  	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
 16053  	LONG $0x04c68348             // add    rsi, 4
 16054  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16055  	JNE  LBB2_916
 16056  	JMP  LBB2_1069
 16057  
 16058  LBB2_208:
 16059  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16060  	JLE  LBB2_1069
 16061  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 16062  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 16063  	LONG $0x08f98341         // cmp    r9d, 8
 16064  	JB   LBB2_210
 16065  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 16066  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16067  	JBE  LBB2_564
 16068  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 16069  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16070  	JBE  LBB2_564
 16071  
 16072  LBB2_210:
 16073  	WORD $0xd231 // xor    edx, edx
 16074  
 16075  LBB2_921:
 16076  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 16077  	WORD $0xf748; BYTE $0xd6 // not    rsi
 16078  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 16079  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 16080  	LONG $0x03e78348         // and    rdi, 3
 16081  	JE   LBB2_923
 16082  
 16083  LBB2_922:
 16084  	LONG $0x0c100ff3; BYTE $0x91   // movss    xmm1, dword [rcx + 4*rdx]
 16085  	LONG $0xc8580ff3               // addss    xmm1, xmm0
 16086  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 16087  	LONG $0x01c28348               // add    rdx, 1
 16088  	LONG $0xffc78348               // add    rdi, -1
 16089  	JNE  LBB2_922
 16090  
 16091  LBB2_923:
 16092  	LONG $0x03fe8348 // cmp    rsi, 3
 16093  	JB   LBB2_1069
 16094  
 16095  LBB2_924:
 16096  	LONG $0x0c100ff3; BYTE $0x91               // movss    xmm1, dword [rcx + 4*rdx]
 16097  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16098  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 16099  	LONG $0x4c100ff3; WORD $0x0491             // movss    xmm1, dword [rcx + 4*rdx + 4]
 16100  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16101  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 16102  	LONG $0x4c100ff3; WORD $0x0891             // movss    xmm1, dword [rcx + 4*rdx + 8]
 16103  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16104  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 16105  	LONG $0x4c100ff3; WORD $0x0c91             // movss    xmm1, dword [rcx + 4*rdx + 12]
 16106  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16107  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 16108  	LONG $0x04c28348                           // add    rdx, 4
 16109  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 16110  	JNE  LBB2_924
 16111  	JMP  LBB2_1069
 16112  
 16113  LBB2_211:
 16114  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16115  	JLE  LBB2_1069
 16116  	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
 16117  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16118  	LONG $0x04f98341         // cmp    r9d, 4
 16119  	JB   LBB2_213
 16120  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 16121  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16122  	JBE  LBB2_567
 16123  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 16124  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16125  	JBE  LBB2_567
 16126  
 16127  LBB2_213:
 16128  	WORD $0xf631 // xor    esi, esi
 16129  
 16130  LBB2_929:
 16131  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 16132  	WORD $0xf748; BYTE $0xd2 // not    rdx
 16133  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 16134  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16135  	LONG $0x03e78348         // and    rdi, 3
 16136  	JE   LBB2_931
 16137  
 16138  LBB2_930:
 16139  	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
 16140  	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
 16141  	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
 16142  	LONG $0x01c68348         // add    rsi, 1
 16143  	LONG $0xffc78348         // add    rdi, -1
 16144  	JNE  LBB2_930
 16145  
 16146  LBB2_931:
 16147  	LONG $0x03fa8348 // cmp    rdx, 3
 16148  	JB   LBB2_1069
 16149  
 16150  LBB2_932:
 16151  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16152  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
 16153  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
 16154  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16155  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
 16156  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
 16157  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16158  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
 16159  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
 16160  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16161  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
 16162  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
 16163  	LONG $0x04c68348             // add    rsi, 4
 16164  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16165  	JNE  LBB2_932
 16166  	JMP  LBB2_1069
 16167  
 16168  LBB2_214:
 16169  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16170  	JLE  LBB2_1069
 16171  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 16172  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 16173  	LONG $0x08f98341         // cmp    r9d, 8
 16174  	JB   LBB2_216
 16175  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 16176  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16177  	JBE  LBB2_570
 16178  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 16179  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16180  	JBE  LBB2_570
 16181  
 16182  LBB2_216:
 16183  	WORD $0xd231 // xor    edx, edx
 16184  
 16185  LBB2_937:
 16186  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 16187  	WORD $0xf748; BYTE $0xd6 // not    rsi
 16188  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 16189  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 16190  	LONG $0x03e78348         // and    rdi, 3
 16191  	JE   LBB2_939
 16192  
 16193  LBB2_938:
 16194  	WORD $0x280f; BYTE $0xc8       // movaps    xmm1, xmm0
 16195  	LONG $0x0c5c0ff3; BYTE $0x91   // subss    xmm1, dword [rcx + 4*rdx]
 16196  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 16197  	LONG $0x01c28348               // add    rdx, 1
 16198  	LONG $0xffc78348               // add    rdi, -1
 16199  	JNE  LBB2_938
 16200  
 16201  LBB2_939:
 16202  	LONG $0x03fe8348 // cmp    rsi, 3
 16203  	JB   LBB2_1069
 16204  
 16205  LBB2_940:
 16206  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16207  	LONG $0x0c5c0ff3; BYTE $0x91               // subss    xmm1, dword [rcx + 4*rdx]
 16208  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 16209  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16210  	LONG $0x4c5c0ff3; WORD $0x0491             // subss    xmm1, dword [rcx + 4*rdx + 4]
 16211  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 16212  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16213  	LONG $0x4c5c0ff3; WORD $0x0891             // subss    xmm1, dword [rcx + 4*rdx + 8]
 16214  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 16215  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16216  	LONG $0x4c5c0ff3; WORD $0x0c91             // subss    xmm1, dword [rcx + 4*rdx + 12]
 16217  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 16218  	LONG $0x04c28348                           // add    rdx, 4
 16219  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 16220  	JNE  LBB2_940
 16221  	JMP  LBB2_1069
 16222  
 16223  LBB2_217:
 16224  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16225  	JLE  LBB2_1069
 16226  	WORD $0x8b48; BYTE $0x02 // mov    rax, qword [rdx]
 16227  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16228  	LONG $0x04f98341         // cmp    r9d, 4
 16229  	JB   LBB2_219
 16230  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 16231  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16232  	JBE  LBB2_573
 16233  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 16234  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16235  	JBE  LBB2_573
 16236  
 16237  LBB2_219:
 16238  	WORD $0xf631 // xor    esi, esi
 16239  
 16240  LBB2_945:
 16241  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16242  	WORD $0xf749; BYTE $0xd1 // not    r9
 16243  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16244  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16245  	LONG $0x03e78348         // and    rdi, 3
 16246  	JE   LBB2_947
 16247  
 16248  LBB2_946:
 16249  	LONG $0xf1148b48         // mov    rdx, qword [rcx + 8*rsi]
 16250  	WORD $0x0148; BYTE $0xc2 // add    rdx, rax
 16251  	LONG $0xf0148949         // mov    qword [r8 + 8*rsi], rdx
 16252  	LONG $0x01c68348         // add    rsi, 1
 16253  	LONG $0xffc78348         // add    rdi, -1
 16254  	JNE  LBB2_946
 16255  
 16256  LBB2_947:
 16257  	LONG $0x03f98349 // cmp    r9, 3
 16258  	JB   LBB2_1069
 16259  
 16260  LBB2_948:
 16261  	LONG $0xf1148b48             // mov    rdx, qword [rcx + 8*rsi]
 16262  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16263  	LONG $0xf0148949             // mov    qword [r8 + 8*rsi], rdx
 16264  	LONG $0xf1548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rsi + 8]
 16265  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16266  	LONG $0xf0548949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rdx
 16267  	LONG $0xf1548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rsi + 16]
 16268  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16269  	LONG $0xf0548949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rdx
 16270  	LONG $0xf1548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rsi + 24]
 16271  	WORD $0x0148; BYTE $0xc2     // add    rdx, rax
 16272  	LONG $0xf0548949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rdx
 16273  	LONG $0x04c68348             // add    rsi, 4
 16274  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16275  	JNE  LBB2_948
 16276  	JMP  LBB2_1069
 16277  
 16278  LBB2_220:
 16279  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16280  	JLE  LBB2_1069
 16281  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 16282  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 16283  	LONG $0x08f98341         // cmp    r9d, 8
 16284  	JB   LBB2_222
 16285  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 16286  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16287  	JBE  LBB2_576
 16288  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 16289  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16290  	JBE  LBB2_576
 16291  
 16292  LBB2_222:
 16293  	WORD $0xd231 // xor    edx, edx
 16294  
 16295  LBB2_953:
 16296  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 16297  	WORD $0xf748; BYTE $0xd6 // not    rsi
 16298  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 16299  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 16300  	LONG $0x03e78348         // and    rdi, 3
 16301  	JE   LBB2_955
 16302  
 16303  LBB2_954:
 16304  	LONG $0x0c100ff3; BYTE $0x91   // movss    xmm1, dword [rcx + 4*rdx]
 16305  	LONG $0xc8580ff3               // addss    xmm1, xmm0
 16306  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 16307  	LONG $0x01c28348               // add    rdx, 1
 16308  	LONG $0xffc78348               // add    rdi, -1
 16309  	JNE  LBB2_954
 16310  
 16311  LBB2_955:
 16312  	LONG $0x03fe8348 // cmp    rsi, 3
 16313  	JB   LBB2_1069
 16314  
 16315  LBB2_956:
 16316  	LONG $0x0c100ff3; BYTE $0x91               // movss    xmm1, dword [rcx + 4*rdx]
 16317  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16318  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 16319  	LONG $0x4c100ff3; WORD $0x0491             // movss    xmm1, dword [rcx + 4*rdx + 4]
 16320  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16321  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 16322  	LONG $0x4c100ff3; WORD $0x0891             // movss    xmm1, dword [rcx + 4*rdx + 8]
 16323  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16324  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 16325  	LONG $0x4c100ff3; WORD $0x0c91             // movss    xmm1, dword [rcx + 4*rdx + 12]
 16326  	LONG $0xc8580ff3                           // addss    xmm1, xmm0
 16327  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 16328  	LONG $0x04c28348                           // add    rdx, 4
 16329  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 16330  	JNE  LBB2_956
 16331  	JMP  LBB2_1069
 16332  
 16333  LBB2_223:
 16334  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16335  	JLE  LBB2_1069
 16336  	WORD $0x8b4c; BYTE $0x1a // mov    r11, qword [rdx]
 16337  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16338  	LONG $0x04f98341         // cmp    r9d, 4
 16339  	JB   LBB2_225
 16340  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 16341  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16342  	JBE  LBB2_579
 16343  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 16344  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16345  	JBE  LBB2_579
 16346  
 16347  LBB2_225:
 16348  	WORD $0xf631 // xor    esi, esi
 16349  
 16350  LBB2_961:
 16351  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 16352  	WORD $0xf748; BYTE $0xd2 // not    rdx
 16353  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 16354  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16355  	LONG $0x03e78348         // and    rdi, 3
 16356  	JE   LBB2_963
 16357  
 16358  LBB2_962:
 16359  	WORD $0x894c; BYTE $0xd8 // mov    rax, r11
 16360  	LONG $0xf1042b48         // sub    rax, qword [rcx + 8*rsi]
 16361  	LONG $0xf0048949         // mov    qword [r8 + 8*rsi], rax
 16362  	LONG $0x01c68348         // add    rsi, 1
 16363  	LONG $0xffc78348         // add    rdi, -1
 16364  	JNE  LBB2_962
 16365  
 16366  LBB2_963:
 16367  	LONG $0x03fa8348 // cmp    rdx, 3
 16368  	JB   LBB2_1069
 16369  
 16370  LBB2_964:
 16371  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16372  	LONG $0xf1042b48             // sub    rax, qword [rcx + 8*rsi]
 16373  	LONG $0xf0048949             // mov    qword [r8 + 8*rsi], rax
 16374  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16375  	LONG $0xf1442b48; BYTE $0x08 // sub    rax, qword [rcx + 8*rsi + 8]
 16376  	LONG $0xf0448949; BYTE $0x08 // mov    qword [r8 + 8*rsi + 8], rax
 16377  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16378  	LONG $0xf1442b48; BYTE $0x10 // sub    rax, qword [rcx + 8*rsi + 16]
 16379  	LONG $0xf0448949; BYTE $0x10 // mov    qword [r8 + 8*rsi + 16], rax
 16380  	WORD $0x894c; BYTE $0xd8     // mov    rax, r11
 16381  	LONG $0xf1442b48; BYTE $0x18 // sub    rax, qword [rcx + 8*rsi + 24]
 16382  	LONG $0xf0448949; BYTE $0x18 // mov    qword [r8 + 8*rsi + 24], rax
 16383  	LONG $0x04c68348             // add    rsi, 4
 16384  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16385  	JNE  LBB2_964
 16386  	JMP  LBB2_1069
 16387  
 16388  LBB2_226:
 16389  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16390  	JLE  LBB2_1069
 16391  	LONG $0x02100ff3         // movss    xmm0, dword [rdx]
 16392  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 16393  	LONG $0x08f98341         // cmp    r9d, 8
 16394  	JB   LBB2_228
 16395  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 16396  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16397  	JBE  LBB2_582
 16398  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 16399  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16400  	JBE  LBB2_582
 16401  
 16402  LBB2_228:
 16403  	WORD $0xd231 // xor    edx, edx
 16404  
 16405  LBB2_969:
 16406  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 16407  	WORD $0xf748; BYTE $0xd6 // not    rsi
 16408  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 16409  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 16410  	LONG $0x03e78348         // and    rdi, 3
 16411  	JE   LBB2_971
 16412  
 16413  LBB2_970:
 16414  	WORD $0x280f; BYTE $0xc8       // movaps    xmm1, xmm0
 16415  	LONG $0x0c5c0ff3; BYTE $0x91   // subss    xmm1, dword [rcx + 4*rdx]
 16416  	LONG $0x110f41f3; WORD $0x900c // movss    dword [r8 + 4*rdx], xmm1
 16417  	LONG $0x01c28348               // add    rdx, 1
 16418  	LONG $0xffc78348               // add    rdi, -1
 16419  	JNE  LBB2_970
 16420  
 16421  LBB2_971:
 16422  	LONG $0x03fe8348 // cmp    rsi, 3
 16423  	JB   LBB2_1069
 16424  
 16425  LBB2_972:
 16426  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16427  	LONG $0x0c5c0ff3; BYTE $0x91               // subss    xmm1, dword [rcx + 4*rdx]
 16428  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 16429  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16430  	LONG $0x4c5c0ff3; WORD $0x0491             // subss    xmm1, dword [rcx + 4*rdx + 4]
 16431  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 16432  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16433  	LONG $0x4c5c0ff3; WORD $0x0891             // subss    xmm1, dword [rcx + 4*rdx + 8]
 16434  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x08 // movss    dword [r8 + 4*rdx + 8], xmm1
 16435  	WORD $0x280f; BYTE $0xc8                   // movaps    xmm1, xmm0
 16436  	LONG $0x4c5c0ff3; WORD $0x0c91             // subss    xmm1, dword [rcx + 4*rdx + 12]
 16437  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x0c // movss    dword [r8 + 4*rdx + 12], xmm1
 16438  	LONG $0x04c28348                           // add    rdx, 4
 16439  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 16440  	JNE  LBB2_972
 16441  	JMP  LBB2_1069
 16442  
 16443  LBB2_229:
 16444  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16445  	JLE  LBB2_1069
 16446  	WORD $0x128a             // mov    dl, byte [rdx]
 16447  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16448  	LONG $0x20f98341         // cmp    r9d, 32
 16449  	JB   LBB2_231
 16450  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
 16451  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
 16452  	JBE  LBB2_585
 16453  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
 16454  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 16455  	JBE  LBB2_585
 16456  
 16457  LBB2_231:
 16458  	WORD $0xff31 // xor    edi, edi
 16459  
 16460  LBB2_977:
 16461  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
 16462  	WORD $0xf749; BYTE $0xd1 // not    r9
 16463  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16464  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
 16465  	LONG $0x03e68348         // and    rsi, 3
 16466  	JE   LBB2_979
 16467  
 16468  LBB2_978:
 16469  	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
 16470  	WORD $0xe2f6     // mul    dl
 16471  	LONG $0x38048841 // mov    byte [r8 + rdi], al
 16472  	LONG $0x01c78348 // add    rdi, 1
 16473  	LONG $0xffc68348 // add    rsi, -1
 16474  	JNE  LBB2_978
 16475  
 16476  LBB2_979:
 16477  	LONG $0x03f98349 // cmp    r9, 3
 16478  	JB   LBB2_1069
 16479  
 16480  LBB2_980:
 16481  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
 16482  	WORD $0xe2f6                 // mul    dl
 16483  	LONG $0x38048841             // mov    byte [r8 + rdi], al
 16484  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
 16485  	WORD $0xe2f6                 // mul    dl
 16486  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
 16487  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
 16488  	WORD $0xe2f6                 // mul    dl
 16489  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
 16490  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
 16491  	WORD $0xe2f6                 // mul    dl
 16492  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
 16493  	LONG $0x04c78348             // add    rdi, 4
 16494  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
 16495  	JNE  LBB2_980
 16496  	JMP  LBB2_1069
 16497  
 16498  LBB2_232:
 16499  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16500  	JLE  LBB2_1069
 16501  	WORD $0x128a             // mov    dl, byte [rdx]
 16502  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16503  	LONG $0x20f98341         // cmp    r9d, 32
 16504  	JB   LBB2_234
 16505  	LONG $0x11048d4a         // lea    rax, [rcx + r10]
 16506  	WORD $0x394c; BYTE $0xc0 // cmp    rax, r8
 16507  	JBE  LBB2_588
 16508  	LONG $0x10048d4b         // lea    rax, [r8 + r10]
 16509  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 16510  	JBE  LBB2_588
 16511  
 16512  LBB2_234:
 16513  	WORD $0xff31 // xor    edi, edi
 16514  
 16515  LBB2_985:
 16516  	WORD $0x8949; BYTE $0xf9 // mov    r9, rdi
 16517  	WORD $0xf749; BYTE $0xd1 // not    r9
 16518  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16519  	WORD $0x894c; BYTE $0xd6 // mov    rsi, r10
 16520  	LONG $0x03e68348         // and    rsi, 3
 16521  	JE   LBB2_987
 16522  
 16523  LBB2_986:
 16524  	LONG $0x3904b60f // movzx    eax, byte [rcx + rdi]
 16525  	WORD $0xe2f6     // mul    dl
 16526  	LONG $0x38048841 // mov    byte [r8 + rdi], al
 16527  	LONG $0x01c78348 // add    rdi, 1
 16528  	LONG $0xffc68348 // add    rsi, -1
 16529  	JNE  LBB2_986
 16530  
 16531  LBB2_987:
 16532  	LONG $0x03f98349 // cmp    r9, 3
 16533  	JB   LBB2_1069
 16534  
 16535  LBB2_988:
 16536  	LONG $0x3904b60f             // movzx    eax, byte [rcx + rdi]
 16537  	WORD $0xe2f6                 // mul    dl
 16538  	LONG $0x38048841             // mov    byte [r8 + rdi], al
 16539  	LONG $0x3944b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdi + 1]
 16540  	WORD $0xe2f6                 // mul    dl
 16541  	LONG $0x38448841; BYTE $0x01 // mov    byte [r8 + rdi + 1], al
 16542  	LONG $0x3944b60f; BYTE $0x02 // movzx    eax, byte [rcx + rdi + 2]
 16543  	WORD $0xe2f6                 // mul    dl
 16544  	LONG $0x38448841; BYTE $0x02 // mov    byte [r8 + rdi + 2], al
 16545  	LONG $0x3944b60f; BYTE $0x03 // movzx    eax, byte [rcx + rdi + 3]
 16546  	WORD $0xe2f6                 // mul    dl
 16547  	LONG $0x38448841; BYTE $0x03 // mov    byte [r8 + rdi + 3], al
 16548  	LONG $0x04c78348             // add    rdi, 4
 16549  	WORD $0x3949; BYTE $0xfa     // cmp    r10, rdi
 16550  	JNE  LBB2_988
 16551  	JMP  LBB2_1069
 16552  
 16553  LBB2_235:
 16554  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16555  	JLE  LBB2_1069
 16556  	WORD $0x028a             // mov    al, byte [rdx]
 16557  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16558  	LONG $0x20f98341         // cmp    r9d, 32
 16559  	JB   LBB2_237
 16560  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 16561  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16562  	JBE  LBB2_591
 16563  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 16564  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16565  	JBE  LBB2_591
 16566  
 16567  LBB2_237:
 16568  	WORD $0xf631 // xor    esi, esi
 16569  
 16570  LBB2_993:
 16571  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16572  	WORD $0xf749; BYTE $0xd1 // not    r9
 16573  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16574  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16575  	LONG $0x03e78348         // and    rdi, 3
 16576  	JE   LBB2_995
 16577  
 16578  LBB2_994:
 16579  	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
 16580  	WORD $0xc200     // add    dl, al
 16581  	LONG $0x30148841 // mov    byte [r8 + rsi], dl
 16582  	LONG $0x01c68348 // add    rsi, 1
 16583  	LONG $0xffc78348 // add    rdi, -1
 16584  	JNE  LBB2_994
 16585  
 16586  LBB2_995:
 16587  	LONG $0x03f98349 // cmp    r9, 3
 16588  	JB   LBB2_1069
 16589  
 16590  LBB2_996:
 16591  	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
 16592  	WORD $0xc200                 // add    dl, al
 16593  	LONG $0x30148841             // mov    byte [r8 + rsi], dl
 16594  	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
 16595  	WORD $0xc200                 // add    dl, al
 16596  	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
 16597  	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
 16598  	WORD $0xc200                 // add    dl, al
 16599  	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
 16600  	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
 16601  	WORD $0xc200                 // add    dl, al
 16602  	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
 16603  	LONG $0x04c68348             // add    rsi, 4
 16604  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16605  	JNE  LBB2_996
 16606  	JMP  LBB2_1069
 16607  
 16608  LBB2_238:
 16609  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16610  	JLE  LBB2_1069
 16611  	WORD $0x8a44; BYTE $0x1a // mov    r11b, byte [rdx]
 16612  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16613  	LONG $0x20f98341         // cmp    r9d, 32
 16614  	JB   LBB2_240
 16615  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 16616  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16617  	JBE  LBB2_594
 16618  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 16619  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16620  	JBE  LBB2_594
 16621  
 16622  LBB2_240:
 16623  	WORD $0xf631 // xor    esi, esi
 16624  
 16625  LBB2_1001:
 16626  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 16627  	WORD $0xf748; BYTE $0xd2 // not    rdx
 16628  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 16629  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16630  	LONG $0x03e78348         // and    rdi, 3
 16631  	JE   LBB2_1003
 16632  
 16633  LBB2_1002:
 16634  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 16635  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
 16636  	LONG $0x30048841         // mov    byte [r8 + rsi], al
 16637  	LONG $0x01c68348         // add    rsi, 1
 16638  	LONG $0xffc78348         // add    rdi, -1
 16639  	JNE  LBB2_1002
 16640  
 16641  LBB2_1003:
 16642  	LONG $0x03fa8348 // cmp    rdx, 3
 16643  	JB   LBB2_1069
 16644  
 16645  LBB2_1004:
 16646  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16647  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
 16648  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 16649  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16650  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
 16651  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 16652  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16653  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
 16654  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
 16655  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16656  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
 16657  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
 16658  	LONG $0x04c68348             // add    rsi, 4
 16659  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16660  	JNE  LBB2_1004
 16661  	JMP  LBB2_1069
 16662  
 16663  LBB2_241:
 16664  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16665  	JLE  LBB2_1069
 16666  	WORD $0x028a             // mov    al, byte [rdx]
 16667  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16668  	LONG $0x20f98341         // cmp    r9d, 32
 16669  	JB   LBB2_243
 16670  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 16671  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16672  	JBE  LBB2_597
 16673  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 16674  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16675  	JBE  LBB2_597
 16676  
 16677  LBB2_243:
 16678  	WORD $0xf631 // xor    esi, esi
 16679  
 16680  LBB2_1009:
 16681  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16682  	WORD $0xf749; BYTE $0xd1 // not    r9
 16683  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16684  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16685  	LONG $0x03e78348         // and    rdi, 3
 16686  	JE   LBB2_1011
 16687  
 16688  LBB2_1010:
 16689  	LONG $0x3114b60f // movzx    edx, byte [rcx + rsi]
 16690  	WORD $0xc200     // add    dl, al
 16691  	LONG $0x30148841 // mov    byte [r8 + rsi], dl
 16692  	LONG $0x01c68348 // add    rsi, 1
 16693  	LONG $0xffc78348 // add    rdi, -1
 16694  	JNE  LBB2_1010
 16695  
 16696  LBB2_1011:
 16697  	LONG $0x03f98349 // cmp    r9, 3
 16698  	JB   LBB2_1069
 16699  
 16700  LBB2_1012:
 16701  	LONG $0x3114b60f             // movzx    edx, byte [rcx + rsi]
 16702  	WORD $0xc200                 // add    dl, al
 16703  	LONG $0x30148841             // mov    byte [r8 + rsi], dl
 16704  	LONG $0x3154b60f; BYTE $0x01 // movzx    edx, byte [rcx + rsi + 1]
 16705  	WORD $0xc200                 // add    dl, al
 16706  	LONG $0x30548841; BYTE $0x01 // mov    byte [r8 + rsi + 1], dl
 16707  	LONG $0x3154b60f; BYTE $0x02 // movzx    edx, byte [rcx + rsi + 2]
 16708  	WORD $0xc200                 // add    dl, al
 16709  	LONG $0x30548841; BYTE $0x02 // mov    byte [r8 + rsi + 2], dl
 16710  	LONG $0x3154b60f; BYTE $0x03 // movzx    edx, byte [rcx + rsi + 3]
 16711  	WORD $0xc200                 // add    dl, al
 16712  	LONG $0x30548841; BYTE $0x03 // mov    byte [r8 + rsi + 3], dl
 16713  	LONG $0x04c68348             // add    rsi, 4
 16714  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16715  	JNE  LBB2_1012
 16716  	JMP  LBB2_1069
 16717  
 16718  LBB2_244:
 16719  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16720  	JLE  LBB2_1069
 16721  	WORD $0x8a44; BYTE $0x1a // mov    r11b, byte [rdx]
 16722  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16723  	LONG $0x20f98341         // cmp    r9d, 32
 16724  	JB   LBB2_246
 16725  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 16726  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16727  	JBE  LBB2_600
 16728  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 16729  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16730  	JBE  LBB2_600
 16731  
 16732  LBB2_246:
 16733  	WORD $0xf631 // xor    esi, esi
 16734  
 16735  LBB2_1017:
 16736  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 16737  	WORD $0xf748; BYTE $0xd2 // not    rdx
 16738  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 16739  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16740  	LONG $0x03e78348         // and    rdi, 3
 16741  	JE   LBB2_1019
 16742  
 16743  LBB2_1018:
 16744  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 16745  	WORD $0x042a; BYTE $0x31 // sub    al, byte [rcx + rsi]
 16746  	LONG $0x30048841         // mov    byte [r8 + rsi], al
 16747  	LONG $0x01c68348         // add    rsi, 1
 16748  	LONG $0xffc78348         // add    rdi, -1
 16749  	JNE  LBB2_1018
 16750  
 16751  LBB2_1019:
 16752  	LONG $0x03fa8348 // cmp    rdx, 3
 16753  	JB   LBB2_1069
 16754  
 16755  LBB2_1020:
 16756  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16757  	WORD $0x042a; BYTE $0x31     // sub    al, byte [rcx + rsi]
 16758  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 16759  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16760  	LONG $0x0131442a             // sub    al, byte [rcx + rsi + 1]
 16761  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 16762  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16763  	LONG $0x0231442a             // sub    al, byte [rcx + rsi + 2]
 16764  	LONG $0x30448841; BYTE $0x02 // mov    byte [r8 + rsi + 2], al
 16765  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16766  	LONG $0x0331442a             // sub    al, byte [rcx + rsi + 3]
 16767  	LONG $0x30448841; BYTE $0x03 // mov    byte [r8 + rsi + 3], al
 16768  	LONG $0x04c68348             // add    rsi, 4
 16769  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16770  	JNE  LBB2_1020
 16771  	JMP  LBB2_1069
 16772  
 16773  LBB2_247:
 16774  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16775  	JLE  LBB2_1069
 16776  	WORD $0x028b             // mov    eax, dword [rdx]
 16777  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16778  	LONG $0x08f98341         // cmp    r9d, 8
 16779  	JB   LBB2_249
 16780  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 16781  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16782  	JBE  LBB2_603
 16783  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 16784  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16785  	JBE  LBB2_603
 16786  
 16787  LBB2_249:
 16788  	WORD $0xf631 // xor    esi, esi
 16789  
 16790  LBB2_1025:
 16791  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16792  	WORD $0xf749; BYTE $0xd1 // not    r9
 16793  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16794  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16795  	LONG $0x03e78348         // and    rdi, 3
 16796  	JE   LBB2_1027
 16797  
 16798  LBB2_1026:
 16799  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 16800  	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
 16801  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 16802  	LONG $0x01c68348         // add    rsi, 1
 16803  	LONG $0xffc78348         // add    rdi, -1
 16804  	JNE  LBB2_1026
 16805  
 16806  LBB2_1027:
 16807  	LONG $0x03f98349 // cmp    r9, 3
 16808  	JB   LBB2_1069
 16809  
 16810  LBB2_1028:
 16811  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 16812  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16813  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 16814  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 16815  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16816  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 16817  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 16818  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16819  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 16820  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 16821  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16822  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 16823  	LONG $0x04c68348             // add    rsi, 4
 16824  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16825  	JNE  LBB2_1028
 16826  	JMP  LBB2_1069
 16827  
 16828  LBB2_250:
 16829  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16830  	JLE  LBB2_1069
 16831  	WORD $0x028b             // mov    eax, dword [rdx]
 16832  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16833  	LONG $0x08f98341         // cmp    r9d, 8
 16834  	JB   LBB2_252
 16835  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 16836  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16837  	JBE  LBB2_606
 16838  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 16839  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16840  	JBE  LBB2_606
 16841  
 16842  LBB2_252:
 16843  	WORD $0xf631 // xor    esi, esi
 16844  
 16845  LBB2_1033:
 16846  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16847  	WORD $0xf749; BYTE $0xd1 // not    r9
 16848  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16849  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16850  	LONG $0x03e78348         // and    rdi, 3
 16851  	JE   LBB2_1035
 16852  
 16853  LBB2_1034:
 16854  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 16855  	WORD $0xaf0f; BYTE $0xd0 // imul    edx, eax
 16856  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 16857  	LONG $0x01c68348         // add    rsi, 1
 16858  	LONG $0xffc78348         // add    rdi, -1
 16859  	JNE  LBB2_1034
 16860  
 16861  LBB2_1035:
 16862  	LONG $0x03f98349 // cmp    r9, 3
 16863  	JB   LBB2_1069
 16864  
 16865  LBB2_1036:
 16866  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 16867  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16868  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 16869  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 16870  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16871  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 16872  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 16873  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16874  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 16875  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 16876  	WORD $0xaf0f; BYTE $0xd0     // imul    edx, eax
 16877  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 16878  	LONG $0x04c68348             // add    rsi, 4
 16879  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16880  	JNE  LBB2_1036
 16881  	JMP  LBB2_1069
 16882  
 16883  LBB2_253:
 16884  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16885  	JLE  LBB2_1069
 16886  	WORD $0x028b             // mov    eax, dword [rdx]
 16887  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16888  	LONG $0x08f98341         // cmp    r9d, 8
 16889  	JB   LBB2_255
 16890  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 16891  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16892  	JBE  LBB2_609
 16893  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 16894  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16895  	JBE  LBB2_609
 16896  
 16897  LBB2_255:
 16898  	WORD $0xf631 // xor    esi, esi
 16899  
 16900  LBB2_1041:
 16901  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 16902  	WORD $0xf749; BYTE $0xd1 // not    r9
 16903  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 16904  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16905  	LONG $0x03e78348         // and    rdi, 3
 16906  	JE   LBB2_1043
 16907  
 16908  LBB2_1042:
 16909  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 16910  	WORD $0xc201             // add    edx, eax
 16911  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 16912  	LONG $0x01c68348         // add    rsi, 1
 16913  	LONG $0xffc78348         // add    rdi, -1
 16914  	JNE  LBB2_1042
 16915  
 16916  LBB2_1043:
 16917  	LONG $0x03f98349 // cmp    r9, 3
 16918  	JB   LBB2_1069
 16919  
 16920  LBB2_1044:
 16921  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 16922  	WORD $0xc201                 // add    edx, eax
 16923  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 16924  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 16925  	WORD $0xc201                 // add    edx, eax
 16926  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 16927  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 16928  	WORD $0xc201                 // add    edx, eax
 16929  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 16930  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 16931  	WORD $0xc201                 // add    edx, eax
 16932  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 16933  	LONG $0x04c68348             // add    rsi, 4
 16934  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16935  	JNE  LBB2_1044
 16936  	JMP  LBB2_1069
 16937  
 16938  LBB2_256:
 16939  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16940  	JLE  LBB2_1069
 16941  	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
 16942  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16943  	LONG $0x08f98341         // cmp    r9d, 8
 16944  	JB   LBB2_258
 16945  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 16946  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 16947  	JBE  LBB2_612
 16948  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 16949  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 16950  	JBE  LBB2_612
 16951  
 16952  LBB2_258:
 16953  	WORD $0xf631 // xor    esi, esi
 16954  
 16955  LBB2_1049:
 16956  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 16957  	WORD $0xf748; BYTE $0xd2 // not    rdx
 16958  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 16959  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 16960  	LONG $0x03e78348         // and    rdi, 3
 16961  	JE   LBB2_1051
 16962  
 16963  LBB2_1050:
 16964  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 16965  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
 16966  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
 16967  	LONG $0x01c68348         // add    rsi, 1
 16968  	LONG $0xffc78348         // add    rdi, -1
 16969  	JNE  LBB2_1050
 16970  
 16971  LBB2_1051:
 16972  	LONG $0x03fa8348 // cmp    rdx, 3
 16973  	JB   LBB2_1069
 16974  
 16975  LBB2_1052:
 16976  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16977  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
 16978  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
 16979  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16980  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
 16981  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
 16982  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16983  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
 16984  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
 16985  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 16986  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
 16987  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
 16988  	LONG $0x04c68348             // add    rsi, 4
 16989  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 16990  	JNE  LBB2_1052
 16991  	JMP  LBB2_1069
 16992  
 16993  LBB2_259:
 16994  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 16995  	JLE  LBB2_1069
 16996  	WORD $0x028b             // mov    eax, dword [rdx]
 16997  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 16998  	LONG $0x08f98341         // cmp    r9d, 8
 16999  	JB   LBB2_261
 17000  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 17001  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 17002  	JBE  LBB2_615
 17003  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 17004  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 17005  	JBE  LBB2_615
 17006  
 17007  LBB2_261:
 17008  	WORD $0xf631 // xor    esi, esi
 17009  
 17010  LBB2_1057:
 17011  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17012  	WORD $0xf749; BYTE $0xd1 // not    r9
 17013  	WORD $0x014d; BYTE $0xd1 // add    r9, r10
 17014  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 17015  	LONG $0x03e78348         // and    rdi, 3
 17016  	JE   LBB2_1059
 17017  
 17018  LBB2_1058:
 17019  	WORD $0x148b; BYTE $0xb1 // mov    edx, dword [rcx + 4*rsi]
 17020  	WORD $0xc201             // add    edx, eax
 17021  	LONG $0xb0148941         // mov    dword [r8 + 4*rsi], edx
 17022  	LONG $0x01c68348         // add    rsi, 1
 17023  	LONG $0xffc78348         // add    rdi, -1
 17024  	JNE  LBB2_1058
 17025  
 17026  LBB2_1059:
 17027  	LONG $0x03f98349 // cmp    r9, 3
 17028  	JB   LBB2_1069
 17029  
 17030  LBB2_1060:
 17031  	WORD $0x148b; BYTE $0xb1     // mov    edx, dword [rcx + 4*rsi]
 17032  	WORD $0xc201                 // add    edx, eax
 17033  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 17034  	LONG $0x04b1548b             // mov    edx, dword [rcx + 4*rsi + 4]
 17035  	WORD $0xc201                 // add    edx, eax
 17036  	LONG $0xb0548941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], edx
 17037  	LONG $0x08b1548b             // mov    edx, dword [rcx + 4*rsi + 8]
 17038  	WORD $0xc201                 // add    edx, eax
 17039  	LONG $0xb0548941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], edx
 17040  	LONG $0x0cb1548b             // mov    edx, dword [rcx + 4*rsi + 12]
 17041  	WORD $0xc201                 // add    edx, eax
 17042  	LONG $0xb0548941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], edx
 17043  	LONG $0x04c68348             // add    rsi, 4
 17044  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 17045  	JNE  LBB2_1060
 17046  	JMP  LBB2_1069
 17047  
 17048  LBB2_262:
 17049  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 17050  	JLE  LBB2_1069
 17051  	WORD $0x8b44; BYTE $0x1a // mov    r11d, dword [rdx]
 17052  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 17053  	LONG $0x08f98341         // cmp    r9d, 8
 17054  	JB   LBB2_264
 17055  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 17056  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 17057  	JBE  LBB2_618
 17058  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 17059  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 17060  	JBE  LBB2_618
 17061  
 17062  LBB2_264:
 17063  	WORD $0xf631 // xor    esi, esi
 17064  
 17065  LBB2_1065:
 17066  	WORD $0x8948; BYTE $0xf2 // mov    rdx, rsi
 17067  	WORD $0xf748; BYTE $0xd2 // not    rdx
 17068  	WORD $0x014c; BYTE $0xd2 // add    rdx, r10
 17069  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 17070  	LONG $0x03e78348         // and    rdi, 3
 17071  	JE   LBB2_1067
 17072  
 17073  LBB2_1066:
 17074  	WORD $0x8944; BYTE $0xd8 // mov    eax, r11d
 17075  	WORD $0x042b; BYTE $0xb1 // sub    eax, dword [rcx + 4*rsi]
 17076  	LONG $0xb0048941         // mov    dword [r8 + 4*rsi], eax
 17077  	LONG $0x01c68348         // add    rsi, 1
 17078  	LONG $0xffc78348         // add    rdi, -1
 17079  	JNE  LBB2_1066
 17080  
 17081  LBB2_1067:
 17082  	LONG $0x03fa8348 // cmp    rdx, 3
 17083  	JB   LBB2_1069
 17084  
 17085  LBB2_1068:
 17086  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 17087  	WORD $0x042b; BYTE $0xb1     // sub    eax, dword [rcx + 4*rsi]
 17088  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
 17089  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 17090  	LONG $0x04b1442b             // sub    eax, dword [rcx + 4*rsi + 4]
 17091  	LONG $0xb0448941; BYTE $0x04 // mov    dword [r8 + 4*rsi + 4], eax
 17092  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 17093  	LONG $0x08b1442b             // sub    eax, dword [rcx + 4*rsi + 8]
 17094  	LONG $0xb0448941; BYTE $0x08 // mov    dword [r8 + 4*rsi + 8], eax
 17095  	WORD $0x8944; BYTE $0xd8     // mov    eax, r11d
 17096  	LONG $0x0cb1442b             // sub    eax, dword [rcx + 4*rsi + 12]
 17097  	LONG $0xb0448941; BYTE $0x0c // mov    dword [r8 + 4*rsi + 12], eax
 17098  	LONG $0x04c68348             // add    rsi, 4
 17099  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 17100  	JNE  LBB2_1068
 17101  	JMP  LBB2_1069
 17102  
 17103  LBB2_319:
 17104  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 17105  	WORD $0xff31             // xor    edi, edi
 17106  
 17107  LBB2_320:
 17108  	LONG $0xf9148b48             // mov    rdx, qword [rcx + 8*rdi]
 17109  	LONG $0xd0af0f48             // imul    rdx, rax
 17110  	LONG $0xf8148949             // mov    qword [r8 + 8*rdi], rdx
 17111  	LONG $0xf9548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rdi + 8]
 17112  	LONG $0xd0af0f48             // imul    rdx, rax
 17113  	LONG $0xf8548949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rdx
 17114  	LONG $0xf9548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rdi + 16]
 17115  	LONG $0xd0af0f48             // imul    rdx, rax
 17116  	LONG $0xf8548949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rdx
 17117  	LONG $0xf9548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rdi + 24]
 17118  	LONG $0xd0af0f48             // imul    rdx, rax
 17119  	LONG $0xf8548949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rdx
 17120  	LONG $0x04c78348             // add    rdi, 4
 17121  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 17122  	JNE  LBB2_320
 17123  
 17124  LBB2_321:
 17125  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 17126  	JE   LBB2_1069
 17127  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 17128  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
 17129  	WORD $0xff31             // xor    edi, edi
 17130  
 17131  LBB2_323:
 17132  	LONG $0xf9148b48         // mov    rdx, qword [rcx + 8*rdi]
 17133  	LONG $0xd0af0f48         // imul    rdx, rax
 17134  	LONG $0xfe148948         // mov    qword [rsi + 8*rdi], rdx
 17135  	LONG $0x01c78348         // add    rdi, 1
 17136  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 17137  	JNE  LBB2_323
 17138  	JMP  LBB2_1069
 17139  
 17140  LBB2_324:
 17141  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 17142  	WORD $0xff31             // xor    edi, edi
 17143  
 17144  LBB2_325:
 17145  	LONG $0xf9148b48             // mov    rdx, qword [rcx + 8*rdi]
 17146  	LONG $0xd0af0f48             // imul    rdx, rax
 17147  	LONG $0xf8148949             // mov    qword [r8 + 8*rdi], rdx
 17148  	LONG $0xf9548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rdi + 8]
 17149  	LONG $0xd0af0f48             // imul    rdx, rax
 17150  	LONG $0xf8548949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rdx
 17151  	LONG $0xf9548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rdi + 16]
 17152  	LONG $0xd0af0f48             // imul    rdx, rax
 17153  	LONG $0xf8548949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rdx
 17154  	LONG $0xf9548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rdi + 24]
 17155  	LONG $0xd0af0f48             // imul    rdx, rax
 17156  	LONG $0xf8548949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rdx
 17157  	LONG $0x04c78348             // add    rdi, 4
 17158  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 17159  	JNE  LBB2_325
 17160  
 17161  LBB2_326:
 17162  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 17163  	JE   LBB2_1069
 17164  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 17165  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
 17166  	WORD $0xff31             // xor    edi, edi
 17167  
 17168  LBB2_328:
 17169  	LONG $0xf9148b48         // mov    rdx, qword [rcx + 8*rdi]
 17170  	LONG $0xd0af0f48         // imul    rdx, rax
 17171  	LONG $0xfe148948         // mov    qword [rsi + 8*rdi], rdx
 17172  	LONG $0x01c78348         // add    rdi, 1
 17173  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 17174  	JNE  LBB2_328
 17175  	JMP  LBB2_1069
 17176  
 17177  LBB2_377:
 17178  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 17179  	WORD $0xff31             // xor    edi, edi
 17180  
 17181  LBB2_378:
 17182  	LONG $0xf9148b48             // mov    rdx, qword [rcx + 8*rdi]
 17183  	LONG $0xd0af0f48             // imul    rdx, rax
 17184  	LONG $0xf8148949             // mov    qword [r8 + 8*rdi], rdx
 17185  	LONG $0xf9548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rdi + 8]
 17186  	LONG $0xd0af0f48             // imul    rdx, rax
 17187  	LONG $0xf8548949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rdx
 17188  	LONG $0xf9548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rdi + 16]
 17189  	LONG $0xd0af0f48             // imul    rdx, rax
 17190  	LONG $0xf8548949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rdx
 17191  	LONG $0xf9548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rdi + 24]
 17192  	LONG $0xd0af0f48             // imul    rdx, rax
 17193  	LONG $0xf8548949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rdx
 17194  	LONG $0x04c78348             // add    rdi, 4
 17195  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 17196  	JNE  LBB2_378
 17197  
 17198  LBB2_379:
 17199  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 17200  	JE   LBB2_1069
 17201  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 17202  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
 17203  	WORD $0xff31             // xor    edi, edi
 17204  
 17205  LBB2_381:
 17206  	LONG $0xf9148b48         // mov    rdx, qword [rcx + 8*rdi]
 17207  	LONG $0xd0af0f48         // imul    rdx, rax
 17208  	LONG $0xfe148948         // mov    qword [rsi + 8*rdi], rdx
 17209  	LONG $0x01c78348         // add    rdi, 1
 17210  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 17211  	JNE  LBB2_381
 17212  	JMP  LBB2_1069
 17213  
 17214  LBB2_385:
 17215  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 17216  	WORD $0xff31             // xor    edi, edi
 17217  
 17218  LBB2_386:
 17219  	LONG $0xf9148b48             // mov    rdx, qword [rcx + 8*rdi]
 17220  	LONG $0xd0af0f48             // imul    rdx, rax
 17221  	LONG $0xf8148949             // mov    qword [r8 + 8*rdi], rdx
 17222  	LONG $0xf9548b48; BYTE $0x08 // mov    rdx, qword [rcx + 8*rdi + 8]
 17223  	LONG $0xd0af0f48             // imul    rdx, rax
 17224  	LONG $0xf8548949; BYTE $0x08 // mov    qword [r8 + 8*rdi + 8], rdx
 17225  	LONG $0xf9548b48; BYTE $0x10 // mov    rdx, qword [rcx + 8*rdi + 16]
 17226  	LONG $0xd0af0f48             // imul    rdx, rax
 17227  	LONG $0xf8548949; BYTE $0x10 // mov    qword [r8 + 8*rdi + 16], rdx
 17228  	LONG $0xf9548b48; BYTE $0x18 // mov    rdx, qword [rcx + 8*rdi + 24]
 17229  	LONG $0xd0af0f48             // imul    rdx, rax
 17230  	LONG $0xf8548949; BYTE $0x18 // mov    qword [r8 + 8*rdi + 24], rdx
 17231  	LONG $0x04c78348             // add    rdi, 4
 17232  	WORD $0x3948; BYTE $0xfe     // cmp    rsi, rdi
 17233  	JNE  LBB2_386
 17234  
 17235  LBB2_387:
 17236  	WORD $0x854d; BYTE $0xc9 // test    r9, r9
 17237  	JE   LBB2_1069
 17238  	LONG $0xf8348d49         // lea    rsi, [r8 + 8*rdi]
 17239  	LONG $0xf90c8d48         // lea    rcx, [rcx + 8*rdi]
 17240  	WORD $0xff31             // xor    edi, edi
 17241  
 17242  LBB2_389:
 17243  	LONG $0xf9148b48         // mov    rdx, qword [rcx + 8*rdi]
 17244  	LONG $0xd0af0f48         // imul    rdx, rax
 17245  	LONG $0xfe148948         // mov    qword [rsi + 8*rdi], rdx
 17246  	LONG $0x01c78348         // add    rdi, 1
 17247  	WORD $0x3949; BYTE $0xf9 // cmp    r9, rdi
 17248  	JNE  LBB2_389
 17249  
 17250  LBB2_1069:
 17251  	RET
 17252  
 17253  LBB2_453:
 17254  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17255  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17256  	LONG $0xc06e0f66             // movd    xmm0, eax
 17257  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17258  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17259  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17260  	LONG $0x03e9c149             // shr    r9, 3
 17261  	LONG $0x01c18349             // add    r9, 1
 17262  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17263  	JE   LBB2_621
 17264  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17265  	LONG $0xfee28348             // and    rdx, -2
 17266  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17267  	WORD $0xff31                 // xor    edi, edi
 17268  
 17269  LBB2_455:
 17270  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17271  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17272  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 17273  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 17274  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 17275  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 17276  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17277  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17278  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 17279  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 17280  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 17281  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 17282  	LONG $0x10c78348                           // add    rdi, 16
 17283  	LONG $0x02c28348                           // add    rdx, 2
 17284  	JNE  LBB2_455
 17285  	JMP  LBB2_622
 17286  
 17287  LBB2_456:
 17288  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17289  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17290  	LONG $0xc06e0f66             // movd    xmm0, eax
 17291  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17292  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17293  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17294  	LONG $0x03e9c149             // shr    r9, 3
 17295  	LONG $0x01c18349             // add    r9, 1
 17296  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17297  	JE   LBB2_629
 17298  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17299  	LONG $0xfee28348             // and    rdx, -2
 17300  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17301  	WORD $0xff31                 // xor    edi, edi
 17302  
 17303  LBB2_458:
 17304  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17305  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17306  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 17307  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 17308  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 17309  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 17310  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17311  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17312  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 17313  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 17314  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 17315  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 17316  	LONG $0x10c78348                           // add    rdi, 16
 17317  	LONG $0x02c28348                           // add    rdx, 2
 17318  	JNE  LBB2_458
 17319  	JMP  LBB2_630
 17320  
 17321  LBB2_459:
 17322  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17323  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17324  	LONG $0xc06e0f66             // movd    xmm0, eax
 17325  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17326  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17327  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17328  	LONG $0x03e9c149             // shr    r9, 3
 17329  	LONG $0x01c18349             // add    r9, 1
 17330  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17331  	JE   LBB2_637
 17332  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17333  	LONG $0xfee28348             // and    rdx, -2
 17334  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17335  	WORD $0xff31                 // xor    edi, edi
 17336  
 17337  LBB2_461:
 17338  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17339  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17340  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 17341  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 17342  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 17343  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 17344  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17345  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17346  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 17347  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 17348  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 17349  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 17350  	LONG $0x10c78348                           // add    rdi, 16
 17351  	LONG $0x02c28348                           // add    rdx, 2
 17352  	JNE  LBB2_461
 17353  	JMP  LBB2_638
 17354  
 17355  LBB2_462:
 17356  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17357  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17358  	LONG $0x6e0f4166; BYTE $0xc3 // movd    xmm0, r11d
 17359  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17360  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17361  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17362  	LONG $0x03e9c149             // shr    r9, 3
 17363  	LONG $0x01c18349             // add    r9, 1
 17364  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17365  	JE   LBB2_645
 17366  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17367  	LONG $0xfee28348             // and    rdx, -2
 17368  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17369  	WORD $0xff31                 // xor    edi, edi
 17370  
 17371  LBB2_464:
 17372  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17373  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17374  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17375  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 17376  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17377  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 17378  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 17379  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
 17380  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17381  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17382  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17383  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 17384  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17385  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 17386  	LONG $0x7f0f41f3; WORD $0xb85c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm3
 17387  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
 17388  	LONG $0x10c78348                           // add    rdi, 16
 17389  	LONG $0x02c28348                           // add    rdx, 2
 17390  	JNE  LBB2_464
 17391  	JMP  LBB2_646
 17392  
 17393  LBB2_465:
 17394  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17395  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17396  	LONG $0xc06e0f66             // movd    xmm0, eax
 17397  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17398  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17399  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17400  	LONG $0x03e9c149             // shr    r9, 3
 17401  	LONG $0x01c18349             // add    r9, 1
 17402  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17403  	JE   LBB2_653
 17404  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17405  	LONG $0xfee28348             // and    rdx, -2
 17406  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17407  	WORD $0xff31                 // xor    edi, edi
 17408  
 17409  LBB2_467:
 17410  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17411  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17412  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 17413  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 17414  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 17415  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 17416  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17417  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17418  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 17419  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 17420  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 17421  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 17422  	LONG $0x10c78348                           // add    rdi, 16
 17423  	LONG $0x02c28348                           // add    rdx, 2
 17424  	JNE  LBB2_467
 17425  	JMP  LBB2_654
 17426  
 17427  LBB2_468:
 17428  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17429  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 17430  	LONG $0x6e0f4166; BYTE $0xc3 // movd    xmm0, r11d
 17431  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 17432  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 17433  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17434  	LONG $0x03e9c149             // shr    r9, 3
 17435  	LONG $0x01c18349             // add    r9, 1
 17436  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17437  	JE   LBB2_661
 17438  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17439  	LONG $0xfee28348             // and    rdx, -2
 17440  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17441  	WORD $0xff31                 // xor    edi, edi
 17442  
 17443  LBB2_470:
 17444  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 17445  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 17446  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17447  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 17448  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17449  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 17450  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 17451  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
 17452  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 17453  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 17454  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17455  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 17456  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17457  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 17458  	LONG $0x7f0f41f3; WORD $0xb85c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm3
 17459  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
 17460  	LONG $0x10c78348                           // add    rdi, 16
 17461  	LONG $0x02c28348                           // add    rdx, 2
 17462  	JNE  LBB2_470
 17463  	JMP  LBB2_662
 17464  
 17465  LBB2_471:
 17466  	WORD $0xc289             // mov    edx, eax
 17467  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17468  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17469  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17470  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17471  	LONG $0x02e9c149         // shr    r9, 2
 17472  	LONG $0x01c18349         // add    r9, 1
 17473  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17474  	JE   LBB2_669
 17475  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17476  	LONG $0xfee68348         // and    rsi, -2
 17477  	WORD $0xf748; BYTE $0xde // neg    rsi
 17478  	WORD $0xff31             // xor    edi, edi
 17479  
 17480  LBB2_473:
 17481  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17482  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17483  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 17484  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 17485  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 17486  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 17487  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17488  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17489  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 17490  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 17491  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 17492  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 17493  	LONG $0x08c78348                           // add    rdi, 8
 17494  	LONG $0x02c68348                           // add    rsi, 2
 17495  	JNE  LBB2_473
 17496  	JMP  LBB2_670
 17497  
 17498  LBB2_474:
 17499  	WORD $0xc289             // mov    edx, eax
 17500  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17501  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17502  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17503  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17504  	LONG $0x02e9c149         // shr    r9, 2
 17505  	LONG $0x01c18349         // add    r9, 1
 17506  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17507  	JE   LBB2_677
 17508  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17509  	LONG $0xfee68348         // and    rsi, -2
 17510  	WORD $0xf748; BYTE $0xde // neg    rsi
 17511  	WORD $0xff31             // xor    edi, edi
 17512  
 17513  LBB2_476:
 17514  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17515  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17516  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 17517  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 17518  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 17519  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 17520  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17521  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17522  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 17523  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 17524  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 17525  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 17526  	LONG $0x08c78348                           // add    rdi, 8
 17527  	LONG $0x02c68348                           // add    rsi, 2
 17528  	JNE  LBB2_476
 17529  	JMP  LBB2_678
 17530  
 17531  LBB2_477:
 17532  	WORD $0xc289             // mov    edx, eax
 17533  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17534  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17535  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17536  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17537  	LONG $0x02e9c149         // shr    r9, 2
 17538  	LONG $0x01c18349         // add    r9, 1
 17539  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17540  	JE   LBB2_685
 17541  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17542  	LONG $0xfee68348         // and    rsi, -2
 17543  	WORD $0xf748; BYTE $0xde // neg    rsi
 17544  	WORD $0xff31             // xor    edi, edi
 17545  
 17546  LBB2_479:
 17547  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17548  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17549  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 17550  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 17551  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 17552  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 17553  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17554  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17555  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 17556  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 17557  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 17558  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 17559  	LONG $0x08c78348                           // add    rdi, 8
 17560  	LONG $0x02c68348                           // add    rsi, 2
 17561  	JNE  LBB2_479
 17562  	JMP  LBB2_686
 17563  
 17564  LBB2_480:
 17565  	WORD $0xc289             // mov    edx, eax
 17566  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17567  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17568  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17569  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17570  	LONG $0x02e9c149         // shr    r9, 2
 17571  	LONG $0x01c18349         // add    r9, 1
 17572  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17573  	JE   LBB2_693
 17574  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17575  	LONG $0xfee68348         // and    rsi, -2
 17576  	WORD $0xf748; BYTE $0xde // neg    rsi
 17577  	WORD $0xff31             // xor    edi, edi
 17578  
 17579  LBB2_482:
 17580  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17581  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17582  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 17583  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 17584  	LONG $0xd1280f66                           // movapd    xmm2, xmm1
 17585  	LONG $0xd35c0f66                           // subpd    xmm2, xmm3
 17586  	LONG $0x110f4166; WORD $0xf824             // movupd    oword [r8 + 8*rdi], xmm4
 17587  	LONG $0x110f4166; WORD $0xf854; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm2
 17588  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17589  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17590  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 17591  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 17592  	LONG $0xd1280f66                           // movapd    xmm2, xmm1
 17593  	LONG $0xd35c0f66                           // subpd    xmm2, xmm3
 17594  	LONG $0x110f4166; WORD $0xf864; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm4
 17595  	LONG $0x110f4166; WORD $0xf854; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm2
 17596  	LONG $0x08c78348                           // add    rdi, 8
 17597  	LONG $0x02c68348                           // add    rsi, 2
 17598  	JNE  LBB2_482
 17599  	JMP  LBB2_694
 17600  
 17601  LBB2_483:
 17602  	WORD $0xc289             // mov    edx, eax
 17603  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17604  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17605  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17606  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17607  	LONG $0x02e9c149         // shr    r9, 2
 17608  	LONG $0x01c18349         // add    r9, 1
 17609  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17610  	JE   LBB2_701
 17611  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17612  	LONG $0xfee68348         // and    rsi, -2
 17613  	WORD $0xf748; BYTE $0xde // neg    rsi
 17614  	WORD $0xff31             // xor    edi, edi
 17615  
 17616  LBB2_485:
 17617  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17618  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17619  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 17620  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 17621  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 17622  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 17623  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17624  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17625  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 17626  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 17627  	LONG $0x110f4166; WORD $0xf854; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm2
 17628  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm3
 17629  	LONG $0x08c78348                           // add    rdi, 8
 17630  	LONG $0x02c68348                           // add    rsi, 2
 17631  	JNE  LBB2_485
 17632  	JMP  LBB2_702
 17633  
 17634  LBB2_486:
 17635  	WORD $0xc289             // mov    edx, eax
 17636  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 17637  	LONG $0xc8120ff2         // movddup    xmm1, xmm0
 17638  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 17639  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 17640  	LONG $0x02e9c149         // shr    r9, 2
 17641  	LONG $0x01c18349         // add    r9, 1
 17642  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 17643  	JE   LBB2_709
 17644  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 17645  	LONG $0xfee68348         // and    rsi, -2
 17646  	WORD $0xf748; BYTE $0xde // neg    rsi
 17647  	WORD $0xff31             // xor    edi, edi
 17648  
 17649  LBB2_488:
 17650  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 17651  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 17652  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 17653  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 17654  	LONG $0xd1280f66                           // movapd    xmm2, xmm1
 17655  	LONG $0xd35c0f66                           // subpd    xmm2, xmm3
 17656  	LONG $0x110f4166; WORD $0xf824             // movupd    oword [r8 + 8*rdi], xmm4
 17657  	LONG $0x110f4166; WORD $0xf854; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm2
 17658  	LONG $0x54100f66; WORD $0x20f9             // movupd    xmm2, oword [rcx + 8*rdi + 32]
 17659  	LONG $0x5c100f66; WORD $0x30f9             // movupd    xmm3, oword [rcx + 8*rdi + 48]
 17660  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 17661  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 17662  	LONG $0xd1280f66                           // movapd    xmm2, xmm1
 17663  	LONG $0xd35c0f66                           // subpd    xmm2, xmm3
 17664  	LONG $0x110f4166; WORD $0xf864; BYTE $0x20 // movupd    oword [r8 + 8*rdi + 32], xmm4
 17665  	LONG $0x110f4166; WORD $0xf854; BYTE $0x30 // movupd    oword [r8 + 8*rdi + 48], xmm2
 17666  	LONG $0x08c78348                           // add    rdi, 8
 17667  	LONG $0x02c68348                           // add    rsi, 2
 17668  	JNE  LBB2_488
 17669  	JMP  LBB2_710
 17670  
 17671  LBB2_489:
 17672  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 17673  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 17674  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 17675  	LONG $0xc06e0f66             // movd    xmm0, eax
 17676  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17677  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17678  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 17679  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 17680  	LONG $0x05e9c149             // shr    r9, 5
 17681  	LONG $0x01c18349             // add    r9, 1
 17682  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 17683  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 17684  	JE   LBB2_717
 17685  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 17686  	LONG $0xfee68348             // and    rsi, -2
 17687  	WORD $0xf748; BYTE $0xde     // neg    rsi
 17688  	WORD $0xc031                 // xor    eax, eax
 17689  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 17690  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 17691  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */
 17692  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 17693  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 17694  
 17695  LBB2_491:
 17696  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 17697  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 17698  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 17699  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 17700  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 17701  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17702  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 17703  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 17704  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 17705  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 17706  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 17707  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 17708  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 17709  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 17710  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17711  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 17712  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 17713  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 17714  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 17715  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 17716  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 17717  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 17718  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 17719  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17720  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 17721  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 17722  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 17723  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 17724  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 17725  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 17726  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 17727  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 17728  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17729  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 17730  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 17731  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 17732  	LONG $0x40c08348                           // add    rax, 64
 17733  	LONG $0x02c68348                           // add    rsi, 2
 17734  	JNE  LBB2_491
 17735  	JMP  LBB2_718
 17736  
 17737  LBB2_492:
 17738  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 17739  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 17740  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 17741  	LONG $0xc06e0f66             // movd    xmm0, eax
 17742  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17743  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17744  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 17745  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 17746  	LONG $0x05e9c149             // shr    r9, 5
 17747  	LONG $0x01c18349             // add    r9, 1
 17748  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 17749  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 17750  	JE   LBB2_725
 17751  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 17752  	LONG $0xfee68348             // and    rsi, -2
 17753  	WORD $0xf748; BYTE $0xde     // neg    rsi
 17754  	WORD $0xc031                 // xor    eax, eax
 17755  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 17756  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 17757  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */
 17758  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 17759  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 17760  
 17761  LBB2_494:
 17762  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 17763  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 17764  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 17765  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 17766  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 17767  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17768  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 17769  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 17770  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 17771  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 17772  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 17773  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 17774  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 17775  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 17776  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17777  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 17778  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 17779  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 17780  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 17781  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 17782  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 17783  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 17784  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 17785  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17786  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 17787  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 17788  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 17789  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 17790  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 17791  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 17792  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 17793  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 17794  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 17795  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 17796  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 17797  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 17798  	LONG $0x40c08348                           // add    rax, 64
 17799  	LONG $0x02c68348                           // add    rsi, 2
 17800  	JNE  LBB2_494
 17801  	JMP  LBB2_726
 17802  
 17803  LBB2_495:
 17804  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17805  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 17806  	WORD $0xb60f; BYTE $0xd0     // movzx    edx, al
 17807  	LONG $0xc26e0f66             // movd    xmm0, edx
 17808  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17809  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17810  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 17811  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17812  	LONG $0x05e9c149             // shr    r9, 5
 17813  	LONG $0x01c18349             // add    r9, 1
 17814  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17815  	JE   LBB2_733
 17816  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17817  	LONG $0xfee28348             // and    rdx, -2
 17818  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17819  	WORD $0xff31                 // xor    edi, edi
 17820  
 17821  LBB2_497:
 17822  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 17823  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 17824  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 17825  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 17826  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 17827  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 17828  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 17829  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 17830  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 17831  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 17832  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 17833  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 17834  	LONG $0x40c78348                           // add    rdi, 64
 17835  	LONG $0x02c28348                           // add    rdx, 2
 17836  	JNE  LBB2_497
 17837  	JMP  LBB2_734
 17838  
 17839  LBB2_498:
 17840  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17841  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 17842  	LONG $0xd3b60f41             // movzx    edx, r11b
 17843  	LONG $0xc26e0f66             // movd    xmm0, edx
 17844  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17845  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17846  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 17847  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17848  	LONG $0x05e9c149             // shr    r9, 5
 17849  	LONG $0x01c18349             // add    r9, 1
 17850  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17851  	JE   LBB2_741
 17852  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17853  	LONG $0xfee28348             // and    rdx, -2
 17854  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17855  	WORD $0xff31                 // xor    edi, edi
 17856  
 17857  LBB2_500:
 17858  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 17859  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 17860  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17861  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 17862  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17863  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 17864  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 17865  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
 17866  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 17867  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 17868  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17869  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 17870  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17871  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 17872  	LONG $0x7f0f41f3; WORD $0x385c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm3
 17873  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
 17874  	LONG $0x40c78348                           // add    rdi, 64
 17875  	LONG $0x02c28348                           // add    rdx, 2
 17876  	JNE  LBB2_500
 17877  	JMP  LBB2_742
 17878  
 17879  LBB2_501:
 17880  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17881  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 17882  	WORD $0xb60f; BYTE $0xd0     // movzx    edx, al
 17883  	LONG $0xc26e0f66             // movd    xmm0, edx
 17884  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17885  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17886  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 17887  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17888  	LONG $0x05e9c149             // shr    r9, 5
 17889  	LONG $0x01c18349             // add    r9, 1
 17890  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17891  	JE   LBB2_749
 17892  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17893  	LONG $0xfee28348             // and    rdx, -2
 17894  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17895  	WORD $0xff31                 // xor    edi, edi
 17896  
 17897  LBB2_503:
 17898  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 17899  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 17900  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 17901  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 17902  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 17903  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 17904  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 17905  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 17906  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 17907  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 17908  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 17909  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 17910  	LONG $0x40c78348                           // add    rdi, 64
 17911  	LONG $0x02c28348                           // add    rdx, 2
 17912  	JNE  LBB2_503
 17913  	JMP  LBB2_750
 17914  
 17915  LBB2_504:
 17916  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17917  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 17918  	LONG $0xd3b60f41             // movzx    edx, r11b
 17919  	LONG $0xc26e0f66             // movd    xmm0, edx
 17920  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 17921  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 17922  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 17923  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17924  	LONG $0x05e9c149             // shr    r9, 5
 17925  	LONG $0x01c18349             // add    r9, 1
 17926  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17927  	JE   LBB2_757
 17928  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17929  	LONG $0xfee28348             // and    rdx, -2
 17930  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17931  	WORD $0xff31                 // xor    edi, edi
 17932  
 17933  LBB2_506:
 17934  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 17935  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 17936  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17937  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 17938  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17939  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 17940  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 17941  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
 17942  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 17943  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 17944  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 17945  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 17946  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 17947  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 17948  	LONG $0x7f0f41f3; WORD $0x385c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm3
 17949  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
 17950  	LONG $0x40c78348                           // add    rdi, 64
 17951  	LONG $0x02c28348                           // add    rdx, 2
 17952  	JNE  LBB2_506
 17953  	JMP  LBB2_758
 17954  
 17955  LBB2_507:
 17956  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17957  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 17958  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 17959  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 17960  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 17961  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17962  	LONG $0x02e9c149             // shr    r9, 2
 17963  	LONG $0x01c18349             // add    r9, 1
 17964  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17965  	JE   LBB2_765
 17966  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 17967  	LONG $0xfee28348             // and    rdx, -2
 17968  	WORD $0xf748; BYTE $0xda     // neg    rdx
 17969  	WORD $0xff31                 // xor    edi, edi
 17970  
 17971  LBB2_509:
 17972  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 17973  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 17974  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 17975  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 17976  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 17977  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 17978  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 17979  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 17980  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 17981  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 17982  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 17983  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 17984  	LONG $0x08c78348                           // add    rdi, 8
 17985  	LONG $0x02c28348                           // add    rdx, 2
 17986  	JNE  LBB2_509
 17987  	JMP  LBB2_766
 17988  
 17989  LBB2_510:
 17990  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 17991  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 17992  	LONG $0x6e0f4966; BYTE $0xc3 // movq    xmm0, r11
 17993  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 17994  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 17995  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 17996  	LONG $0x02e9c149             // shr    r9, 2
 17997  	LONG $0x01c18349             // add    r9, 1
 17998  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 17999  	JE   LBB2_773
 18000  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18001  	LONG $0xfee28348             // and    rdx, -2
 18002  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18003  	WORD $0xff31                 // xor    edi, edi
 18004  
 18005  LBB2_512:
 18006  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18007  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18008  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18009  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18010  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18011  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18012  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 18013  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
 18014  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18015  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18016  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18017  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18018  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18019  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18020  	LONG $0x7f0f41f3; WORD $0xf85c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm3
 18021  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
 18022  	LONG $0x08c78348                           // add    rdi, 8
 18023  	LONG $0x02c28348                           // add    rdx, 2
 18024  	JNE  LBB2_512
 18025  	JMP  LBB2_774
 18026  
 18027  LBB2_513:
 18028  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18029  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18030  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 18031  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18032  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18033  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18034  	LONG $0x02e9c149             // shr    r9, 2
 18035  	LONG $0x01c18349             // add    r9, 1
 18036  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18037  	JE   LBB2_781
 18038  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18039  	LONG $0xfee28348             // and    rdx, -2
 18040  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18041  	WORD $0xff31                 // xor    edi, edi
 18042  
 18043  LBB2_515:
 18044  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18045  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18046  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18047  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18048  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 18049  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 18050  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18051  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18052  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18053  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18054  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 18055  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 18056  	LONG $0x08c78348                           // add    rdi, 8
 18057  	LONG $0x02c28348                           // add    rdx, 2
 18058  	JNE  LBB2_515
 18059  	JMP  LBB2_782
 18060  
 18061  LBB2_516:
 18062  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18063  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18064  	LONG $0x6e0f4966; BYTE $0xc3 // movq    xmm0, r11
 18065  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18066  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18067  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18068  	LONG $0x02e9c149             // shr    r9, 2
 18069  	LONG $0x01c18349             // add    r9, 1
 18070  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18071  	JE   LBB2_789
 18072  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18073  	LONG $0xfee28348             // and    rdx, -2
 18074  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18075  	WORD $0xff31                 // xor    edi, edi
 18076  
 18077  LBB2_518:
 18078  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18079  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18080  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18081  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18082  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18083  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18084  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 18085  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
 18086  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18087  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18088  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18089  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18090  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18091  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18092  	LONG $0x7f0f41f3; WORD $0xf85c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm3
 18093  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
 18094  	LONG $0x08c78348                           // add    rdi, 8
 18095  	LONG $0x02c28348                           // add    rdx, 2
 18096  	JNE  LBB2_518
 18097  	JMP  LBB2_790
 18098  
 18099  LBB2_519:
 18100  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18101  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18102  	LONG $0xc06e0f66             // movd    xmm0, eax
 18103  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18104  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18105  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18106  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18107  	LONG $0x04e9c149             // shr    r9, 4
 18108  	LONG $0x01c18349             // add    r9, 1
 18109  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18110  	JE   LBB2_797
 18111  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18112  	LONG $0xfee28348             // and    rdx, -2
 18113  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18114  	WORD $0xff31                 // xor    edi, edi
 18115  
 18116  LBB2_521:
 18117  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18118  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18119  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18120  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18121  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18122  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18123  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18124  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18125  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18126  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18127  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18128  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18129  	LONG $0x20c78348                           // add    rdi, 32
 18130  	LONG $0x02c28348                           // add    rdx, 2
 18131  	JNE  LBB2_521
 18132  	JMP  LBB2_798
 18133  
 18134  LBB2_522:
 18135  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18136  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18137  	LONG $0xc06e0f66             // movd    xmm0, eax
 18138  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18139  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18140  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18141  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18142  	LONG $0x04e9c149             // shr    r9, 4
 18143  	LONG $0x01c18349             // add    r9, 1
 18144  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18145  	JE   LBB2_805
 18146  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18147  	LONG $0xfee28348             // and    rdx, -2
 18148  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18149  	WORD $0xff31                 // xor    edi, edi
 18150  
 18151  LBB2_524:
 18152  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18153  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18154  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18155  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18156  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18157  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18158  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18159  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18160  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18161  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18162  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18163  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18164  	LONG $0x20c78348                           // add    rdi, 32
 18165  	LONG $0x02c28348                           // add    rdx, 2
 18166  	JNE  LBB2_524
 18167  	JMP  LBB2_806
 18168  
 18169  LBB2_525:
 18170  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18171  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18172  	LONG $0xc06e0f66             // movd    xmm0, eax
 18173  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18174  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18175  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18176  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18177  	LONG $0x04e9c149             // shr    r9, 4
 18178  	LONG $0x01c18349             // add    r9, 1
 18179  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18180  	JE   LBB2_813
 18181  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18182  	LONG $0xfee28348             // and    rdx, -2
 18183  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18184  	WORD $0xff31                 // xor    edi, edi
 18185  
 18186  LBB2_527:
 18187  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18188  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18189  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18190  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18191  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18192  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18193  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18194  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18195  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18196  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18197  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18198  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18199  	LONG $0x20c78348                           // add    rdi, 32
 18200  	LONG $0x02c28348                           // add    rdx, 2
 18201  	JNE  LBB2_527
 18202  	JMP  LBB2_814
 18203  
 18204  LBB2_528:
 18205  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18206  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18207  	LONG $0xc06e0f66             // movd    xmm0, eax
 18208  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18209  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18210  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18211  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18212  	LONG $0x04e9c149             // shr    r9, 4
 18213  	LONG $0x01c18349             // add    r9, 1
 18214  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18215  	JE   LBB2_821
 18216  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18217  	LONG $0xfee28348             // and    rdx, -2
 18218  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18219  	WORD $0xff31                 // xor    edi, edi
 18220  
 18221  LBB2_530:
 18222  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18223  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18224  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18225  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18226  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18227  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18228  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18229  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18230  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 18231  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 18232  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18233  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18234  	LONG $0x20c78348                           // add    rdi, 32
 18235  	LONG $0x02c28348                           // add    rdx, 2
 18236  	JNE  LBB2_530
 18237  	JMP  LBB2_822
 18238  
 18239  LBB2_531:
 18240  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18241  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18242  	LONG $0xc06e0f66             // movd    xmm0, eax
 18243  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18244  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18245  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18246  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18247  	LONG $0x04e9c149             // shr    r9, 4
 18248  	LONG $0x01c18349             // add    r9, 1
 18249  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18250  	JE   LBB2_829
 18251  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18252  	LONG $0xfee28348             // and    rdx, -2
 18253  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18254  	WORD $0xff31                 // xor    edi, edi
 18255  
 18256  LBB2_533:
 18257  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18258  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18259  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18260  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18261  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18262  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18263  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18264  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18265  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18266  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18267  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18268  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18269  	LONG $0x20c78348                           // add    rdi, 32
 18270  	LONG $0x02c28348                           // add    rdx, 2
 18271  	JNE  LBB2_533
 18272  	JMP  LBB2_830
 18273  
 18274  LBB2_534:
 18275  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18276  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18277  	LONG $0xc06e0f66             // movd    xmm0, eax
 18278  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18279  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18280  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18281  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18282  	LONG $0x04e9c149             // shr    r9, 4
 18283  	LONG $0x01c18349             // add    r9, 1
 18284  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18285  	JE   LBB2_837
 18286  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18287  	LONG $0xfee28348             // and    rdx, -2
 18288  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18289  	WORD $0xff31                 // xor    edi, edi
 18290  
 18291  LBB2_536:
 18292  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18293  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18294  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18295  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18296  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18297  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18298  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18299  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18300  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18301  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18302  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18303  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18304  	LONG $0x20c78348                           // add    rdi, 32
 18305  	LONG $0x02c28348                           // add    rdx, 2
 18306  	JNE  LBB2_536
 18307  	JMP  LBB2_838
 18308  
 18309  LBB2_537:
 18310  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18311  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18312  	LONG $0xc06e0f66             // movd    xmm0, eax
 18313  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18314  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18315  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18316  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18317  	LONG $0x04e9c149             // shr    r9, 4
 18318  	LONG $0x01c18349             // add    r9, 1
 18319  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18320  	JE   LBB2_845
 18321  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18322  	LONG $0xfee28348             // and    rdx, -2
 18323  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18324  	WORD $0xff31                 // xor    edi, edi
 18325  
 18326  LBB2_539:
 18327  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18328  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18329  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18330  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18331  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18332  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18333  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 18334  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
 18335  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18336  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18337  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18338  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18339  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18340  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18341  	LONG $0x7f0f41f3; WORD $0x785c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm3
 18342  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
 18343  	LONG $0x20c78348                           // add    rdi, 32
 18344  	LONG $0x02c28348                           // add    rdx, 2
 18345  	JNE  LBB2_539
 18346  	JMP  LBB2_846
 18347  
 18348  LBB2_540:
 18349  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18350  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18351  	LONG $0xc06e0f66             // movd    xmm0, eax
 18352  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18353  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18354  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18355  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18356  	LONG $0x04e9c149             // shr    r9, 4
 18357  	LONG $0x01c18349             // add    r9, 1
 18358  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18359  	JE   LBB2_853
 18360  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18361  	LONG $0xfee28348             // and    rdx, -2
 18362  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18363  	WORD $0xff31                 // xor    edi, edi
 18364  
 18365  LBB2_542:
 18366  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18367  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18368  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18369  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18370  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18371  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18372  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 18373  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
 18374  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18375  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18376  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18377  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18378  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18379  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18380  	LONG $0x7f0f41f3; WORD $0x785c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm3
 18381  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
 18382  	LONG $0x20c78348                           // add    rdi, 32
 18383  	LONG $0x02c28348                           // add    rdx, 2
 18384  	JNE  LBB2_542
 18385  	JMP  LBB2_854
 18386  
 18387  LBB2_543:
 18388  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18389  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18390  	LONG $0xc06e0f66             // movd    xmm0, eax
 18391  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18392  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18393  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18394  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18395  	LONG $0x04e9c149             // shr    r9, 4
 18396  	LONG $0x01c18349             // add    r9, 1
 18397  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18398  	JE   LBB2_861
 18399  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18400  	LONG $0xfee28348             // and    rdx, -2
 18401  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18402  	WORD $0xff31                 // xor    edi, edi
 18403  
 18404  LBB2_545:
 18405  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18406  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18407  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18408  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18409  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18410  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18411  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18412  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18413  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18414  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18415  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18416  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18417  	LONG $0x20c78348                           // add    rdi, 32
 18418  	LONG $0x02c28348                           // add    rdx, 2
 18419  	JNE  LBB2_545
 18420  	JMP  LBB2_862
 18421  
 18422  LBB2_546:
 18423  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18424  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18425  	LONG $0xc06e0f66             // movd    xmm0, eax
 18426  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18427  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18428  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18429  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18430  	LONG $0x04e9c149             // shr    r9, 4
 18431  	LONG $0x01c18349             // add    r9, 1
 18432  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18433  	JE   LBB2_869
 18434  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18435  	LONG $0xfee28348             // and    rdx, -2
 18436  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18437  	WORD $0xff31                 // xor    edi, edi
 18438  
 18439  LBB2_548:
 18440  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18441  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18442  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18443  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18444  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 18445  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 18446  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18447  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18448  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 18449  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 18450  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm1
 18451  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm2
 18452  	LONG $0x20c78348                           // add    rdi, 32
 18453  	LONG $0x02c28348                           // add    rdx, 2
 18454  	JNE  LBB2_548
 18455  	JMP  LBB2_870
 18456  
 18457  LBB2_549:
 18458  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18459  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18460  	LONG $0xc06e0f66             // movd    xmm0, eax
 18461  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18462  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18463  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18464  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18465  	LONG $0x04e9c149             // shr    r9, 4
 18466  	LONG $0x01c18349             // add    r9, 1
 18467  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18468  	JE   LBB2_877
 18469  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18470  	LONG $0xfee28348             // and    rdx, -2
 18471  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18472  	WORD $0xff31                 // xor    edi, edi
 18473  
 18474  LBB2_551:
 18475  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18476  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18477  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18478  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18479  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18480  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18481  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 18482  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
 18483  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18484  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18485  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18486  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18487  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18488  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18489  	LONG $0x7f0f41f3; WORD $0x785c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm3
 18490  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
 18491  	LONG $0x20c78348                           // add    rdi, 32
 18492  	LONG $0x02c28348                           // add    rdx, 2
 18493  	JNE  LBB2_551
 18494  	JMP  LBB2_878
 18495  
 18496  LBB2_552:
 18497  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18498  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 18499  	LONG $0xc06e0f66             // movd    xmm0, eax
 18500  	LONG $0xc0700ff2; BYTE $0xe0 // pshuflw    xmm0, xmm0, 224
 18501  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 18502  	LONG $0xf0568d48             // lea    rdx, [rsi - 16]
 18503  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18504  	LONG $0x04e9c149             // shr    r9, 4
 18505  	LONG $0x01c18349             // add    r9, 1
 18506  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18507  	JE   LBB2_885
 18508  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18509  	LONG $0xfee28348             // and    rdx, -2
 18510  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18511  	WORD $0xff31                 // xor    edi, edi
 18512  
 18513  LBB2_554:
 18514  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 18515  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 18516  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18517  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18518  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18519  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18520  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 18521  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm1
 18522  	LONG $0x4c6f0ff3; WORD $0x2079             // movdqu    xmm1, oword [rcx + 2*rdi + 32]
 18523  	LONG $0x546f0ff3; WORD $0x3079             // movdqu    xmm2, oword [rcx + 2*rdi + 48]
 18524  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18525  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 18526  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18527  	LONG $0xcaf90f66                           // psubw    xmm1, xmm2
 18528  	LONG $0x7f0f41f3; WORD $0x785c; BYTE $0x20 // movdqu    oword [r8 + 2*rdi + 32], xmm3
 18529  	LONG $0x7f0f41f3; WORD $0x784c; BYTE $0x30 // movdqu    oword [r8 + 2*rdi + 48], xmm1
 18530  	LONG $0x20c78348                           // add    rdi, 32
 18531  	LONG $0x02c28348                           // add    rdx, 2
 18532  	JNE  LBB2_554
 18533  	JMP  LBB2_886
 18534  
 18535  LBB2_555:
 18536  	WORD $0xc289             // mov    edx, eax
 18537  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18538  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18539  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18540  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18541  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18542  	LONG $0x03e9c149         // shr    r9, 3
 18543  	LONG $0x01c18349         // add    r9, 1
 18544  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18545  	JE   LBB2_893
 18546  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18547  	LONG $0xfee68348         // and    rsi, -2
 18548  	WORD $0xf748; BYTE $0xde // neg    rsi
 18549  	WORD $0xff31             // xor    edi, edi
 18550  
 18551  LBB2_557:
 18552  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18553  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18554  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 18555  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 18556  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 18557  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 18558  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18559  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18560  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 18561  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 18562  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 18563  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 18564  	LONG $0x10c78348               // add    rdi, 16
 18565  	LONG $0x02c68348               // add    rsi, 2
 18566  	JNE  LBB2_557
 18567  	JMP  LBB2_894
 18568  
 18569  LBB2_558:
 18570  	WORD $0xc289             // mov    edx, eax
 18571  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18572  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18573  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18574  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18575  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18576  	LONG $0x03e9c149         // shr    r9, 3
 18577  	LONG $0x01c18349         // add    r9, 1
 18578  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18579  	JE   LBB2_901
 18580  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18581  	LONG $0xfee68348         // and    rsi, -2
 18582  	WORD $0xf748; BYTE $0xde // neg    rsi
 18583  	WORD $0xff31             // xor    edi, edi
 18584  
 18585  LBB2_560:
 18586  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18587  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18588  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 18589  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 18590  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 18591  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 18592  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18593  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18594  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 18595  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 18596  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 18597  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 18598  	LONG $0x10c78348               // add    rdi, 16
 18599  	LONG $0x02c68348               // add    rsi, 2
 18600  	JNE  LBB2_560
 18601  	JMP  LBB2_902
 18602  
 18603  LBB2_561:
 18604  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18605  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18606  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 18607  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18608  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18609  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18610  	LONG $0x02e9c149             // shr    r9, 2
 18611  	LONG $0x01c18349             // add    r9, 1
 18612  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18613  	JE   LBB2_909
 18614  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18615  	LONG $0xfee28348             // and    rdx, -2
 18616  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18617  	WORD $0xff31                 // xor    edi, edi
 18618  
 18619  LBB2_563:
 18620  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18621  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18622  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18623  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18624  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 18625  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 18626  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18627  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18628  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18629  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18630  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 18631  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 18632  	LONG $0x08c78348                           // add    rdi, 8
 18633  	LONG $0x02c28348                           // add    rdx, 2
 18634  	JNE  LBB2_563
 18635  	JMP  LBB2_910
 18636  
 18637  LBB2_564:
 18638  	WORD $0xc289             // mov    edx, eax
 18639  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18640  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18641  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18642  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18643  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18644  	LONG $0x03e9c149         // shr    r9, 3
 18645  	LONG $0x01c18349         // add    r9, 1
 18646  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18647  	JE   LBB2_917
 18648  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18649  	LONG $0xfee68348         // and    rsi, -2
 18650  	WORD $0xf748; BYTE $0xde // neg    rsi
 18651  	WORD $0xff31             // xor    edi, edi
 18652  
 18653  LBB2_566:
 18654  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18655  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18656  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 18657  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 18658  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 18659  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 18660  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18661  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18662  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 18663  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 18664  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 18665  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 18666  	LONG $0x10c78348               // add    rdi, 16
 18667  	LONG $0x02c68348               // add    rsi, 2
 18668  	JNE  LBB2_566
 18669  	JMP  LBB2_918
 18670  
 18671  LBB2_567:
 18672  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18673  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18674  	LONG $0x6e0f4966; BYTE $0xc3 // movq    xmm0, r11
 18675  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18676  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18677  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18678  	LONG $0x02e9c149             // shr    r9, 2
 18679  	LONG $0x01c18349             // add    r9, 1
 18680  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18681  	JE   LBB2_925
 18682  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18683  	LONG $0xfee28348             // and    rdx, -2
 18684  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18685  	WORD $0xff31                 // xor    edi, edi
 18686  
 18687  LBB2_569:
 18688  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18689  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18690  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18691  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18692  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18693  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18694  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 18695  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
 18696  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18697  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18698  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18699  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18700  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18701  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18702  	LONG $0x7f0f41f3; WORD $0xf85c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm3
 18703  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
 18704  	LONG $0x08c78348                           // add    rdi, 8
 18705  	LONG $0x02c28348                           // add    rdx, 2
 18706  	JNE  LBB2_569
 18707  	JMP  LBB2_926
 18708  
 18709  LBB2_570:
 18710  	WORD $0xc289             // mov    edx, eax
 18711  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18712  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18713  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18714  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18715  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18716  	LONG $0x03e9c149         // shr    r9, 3
 18717  	LONG $0x01c18349         // add    r9, 1
 18718  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18719  	JE   LBB2_933
 18720  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18721  	LONG $0xfee68348         // and    rsi, -2
 18722  	WORD $0xf748; BYTE $0xde // neg    rsi
 18723  	WORD $0xff31             // xor    edi, edi
 18724  
 18725  LBB2_572:
 18726  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18727  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18728  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 18729  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 18730  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
 18731  	WORD $0x5c0f; BYTE $0xd3       // subps    xmm2, xmm3
 18732  	LONG $0x24110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm4
 18733  	LONG $0x54110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm2
 18734  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18735  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18736  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 18737  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 18738  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
 18739  	WORD $0x5c0f; BYTE $0xd3       // subps    xmm2, xmm3
 18740  	LONG $0x64110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm4
 18741  	LONG $0x54110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm2
 18742  	LONG $0x10c78348               // add    rdi, 16
 18743  	LONG $0x02c68348               // add    rsi, 2
 18744  	JNE  LBB2_572
 18745  	JMP  LBB2_934
 18746  
 18747  LBB2_573:
 18748  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18749  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18750  	LONG $0x6e0f4866; BYTE $0xc0 // movq    xmm0, rax
 18751  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18752  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18753  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18754  	LONG $0x02e9c149             // shr    r9, 2
 18755  	LONG $0x01c18349             // add    r9, 1
 18756  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18757  	JE   LBB2_941
 18758  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18759  	LONG $0xfee28348             // and    rdx, -2
 18760  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18761  	WORD $0xff31                 // xor    edi, edi
 18762  
 18763  LBB2_575:
 18764  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18765  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18766  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18767  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18768  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 18769  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 18770  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18771  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18772  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 18773  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 18774  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm1
 18775  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm2
 18776  	LONG $0x08c78348                           // add    rdi, 8
 18777  	LONG $0x02c28348                           // add    rdx, 2
 18778  	JNE  LBB2_575
 18779  	JMP  LBB2_942
 18780  
 18781  LBB2_576:
 18782  	WORD $0xc289             // mov    edx, eax
 18783  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18784  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18785  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18786  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18787  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18788  	LONG $0x03e9c149         // shr    r9, 3
 18789  	LONG $0x01c18349         // add    r9, 1
 18790  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18791  	JE   LBB2_949
 18792  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18793  	LONG $0xfee68348         // and    rsi, -2
 18794  	WORD $0xf748; BYTE $0xde // neg    rsi
 18795  	WORD $0xff31             // xor    edi, edi
 18796  
 18797  LBB2_578:
 18798  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18799  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18800  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 18801  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 18802  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 18803  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 18804  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18805  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18806  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 18807  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 18808  	LONG $0x54110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm2
 18809  	LONG $0x5c110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm3
 18810  	LONG $0x10c78348               // add    rdi, 16
 18811  	LONG $0x02c68348               // add    rsi, 2
 18812  	JNE  LBB2_578
 18813  	JMP  LBB2_950
 18814  
 18815  LBB2_579:
 18816  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 18817  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 18818  	LONG $0x6e0f4966; BYTE $0xc3 // movq    xmm0, r11
 18819  	LONG $0xc0700f66; BYTE $0x44 // pshufd    xmm0, xmm0, 68
 18820  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 18821  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 18822  	LONG $0x02e9c149             // shr    r9, 2
 18823  	LONG $0x01c18349             // add    r9, 1
 18824  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 18825  	JE   LBB2_957
 18826  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 18827  	LONG $0xfee28348             // and    rdx, -2
 18828  	WORD $0xf748; BYTE $0xda     // neg    rdx
 18829  	WORD $0xff31                 // xor    edi, edi
 18830  
 18831  LBB2_581:
 18832  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 18833  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 18834  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18835  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18836  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18837  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18838  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 18839  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm1
 18840  	LONG $0x4c6f0ff3; WORD $0x20f9             // movdqu    xmm1, oword [rcx + 8*rdi + 32]
 18841  	LONG $0x546f0ff3; WORD $0x30f9             // movdqu    xmm2, oword [rcx + 8*rdi + 48]
 18842  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 18843  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 18844  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 18845  	LONG $0xcafb0f66                           // psubq    xmm1, xmm2
 18846  	LONG $0x7f0f41f3; WORD $0xf85c; BYTE $0x20 // movdqu    oword [r8 + 8*rdi + 32], xmm3
 18847  	LONG $0x7f0f41f3; WORD $0xf84c; BYTE $0x30 // movdqu    oword [r8 + 8*rdi + 48], xmm1
 18848  	LONG $0x08c78348                           // add    rdi, 8
 18849  	LONG $0x02c28348                           // add    rdx, 2
 18850  	JNE  LBB2_581
 18851  	JMP  LBB2_958
 18852  
 18853  LBB2_582:
 18854  	WORD $0xc289             // mov    edx, eax
 18855  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 18856  	WORD $0x280f; BYTE $0xc8 // movaps    xmm1, xmm0
 18857  	LONG $0x00c8c60f         // shufps    xmm1, xmm0, 0
 18858  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 18859  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 18860  	LONG $0x03e9c149         // shr    r9, 3
 18861  	LONG $0x01c18349         // add    r9, 1
 18862  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 18863  	JE   LBB2_965
 18864  	WORD $0x894c; BYTE $0xce // mov    rsi, r9
 18865  	LONG $0xfee68348         // and    rsi, -2
 18866  	WORD $0xf748; BYTE $0xde // neg    rsi
 18867  	WORD $0xff31             // xor    edi, edi
 18868  
 18869  LBB2_584:
 18870  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 18871  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 18872  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 18873  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 18874  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
 18875  	WORD $0x5c0f; BYTE $0xd3       // subps    xmm2, xmm3
 18876  	LONG $0x24110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm4
 18877  	LONG $0x54110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm2
 18878  	LONG $0xb954100f; BYTE $0x20   // movups    xmm2, oword [rcx + 4*rdi + 32]
 18879  	LONG $0xb95c100f; BYTE $0x30   // movups    xmm3, oword [rcx + 4*rdi + 48]
 18880  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 18881  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 18882  	WORD $0x280f; BYTE $0xd1       // movaps    xmm2, xmm1
 18883  	WORD $0x5c0f; BYTE $0xd3       // subps    xmm2, xmm3
 18884  	LONG $0x64110f41; WORD $0x20b8 // movups    oword [r8 + 4*rdi + 32], xmm4
 18885  	LONG $0x54110f41; WORD $0x30b8 // movups    oword [r8 + 4*rdi + 48], xmm2
 18886  	LONG $0x10c78348               // add    rdi, 16
 18887  	LONG $0x02c68348               // add    rsi, 2
 18888  	JNE  LBB2_584
 18889  	JMP  LBB2_966
 18890  
 18891  LBB2_585:
 18892  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 18893  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 18894  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 18895  	LONG $0xc06e0f66             // movd    xmm0, eax
 18896  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 18897  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 18898  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 18899  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 18900  	LONG $0x05e9c149             // shr    r9, 5
 18901  	LONG $0x01c18349             // add    r9, 1
 18902  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 18903  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 18904  	JE   LBB2_973
 18905  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 18906  	LONG $0xfee68348             // and    rsi, -2
 18907  	WORD $0xf748; BYTE $0xde     // neg    rsi
 18908  	WORD $0xc031                 // xor    eax, eax
 18909  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 18910  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 18911  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */
 18912  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 18913  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 18914  
 18915  LBB2_587:
 18916  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 18917  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 18918  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 18919  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 18920  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 18921  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18922  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 18923  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 18924  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 18925  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 18926  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 18927  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 18928  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 18929  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 18930  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18931  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 18932  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 18933  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 18934  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 18935  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 18936  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 18937  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 18938  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 18939  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18940  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 18941  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 18942  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 18943  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 18944  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 18945  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 18946  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 18947  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 18948  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18949  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 18950  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 18951  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 18952  	LONG $0x40c08348                           // add    rax, 64
 18953  	LONG $0x02c68348                           // add    rsi, 2
 18954  	JNE  LBB2_587
 18955  	JMP  LBB2_974
 18956  
 18957  LBB2_588:
 18958  	WORD $0x8944; BYTE $0xd7     // mov    edi, r10d
 18959  	WORD $0xe783; BYTE $0xe0     // and    edi, -32
 18960  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 18961  	LONG $0xc06e0f66             // movd    xmm0, eax
 18962  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 18963  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 18964  	LONG $0xe0478d48             // lea    rax, [rdi - 32]
 18965  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 18966  	LONG $0x05e9c149             // shr    r9, 5
 18967  	LONG $0x01c18349             // add    r9, 1
 18968  	LONG $0x30380f66; BYTE $0xc8 // pmovzxbw    xmm1, xmm0
 18969  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 18970  	JE   LBB2_981
 18971  	WORD $0x894c; BYTE $0xce     // mov    rsi, r9
 18972  	LONG $0xfee68348             // and    rsi, -2
 18973  	WORD $0xf748; BYTE $0xde     // neg    rsi
 18974  	WORD $0xc031                 // xor    eax, eax
 18975  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 18976  	LONG $0xd2680f66             // punpckhbw    xmm2, xmm2
 18977  	LONG $0x5d6f0f66; BYTE $0x00 // movdqa    xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */
 18978  	LONG $0xe06f0f66             // movdqa    xmm4, xmm0
 18979  	LONG $0xe4680f66             // punpckhbw    xmm4, xmm4
 18980  
 18981  LBB2_590:
 18982  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 18983  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 18984  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 18985  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 18986  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 18987  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18988  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 18989  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 18990  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 18991  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 18992  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 18993  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 18994  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 18995  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 18996  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 18997  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 18998  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 18999  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 19000  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 19001  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 19002  	LONG $0x30380f66; BYTE $0xfd               // pmovzxbw    xmm7, xmm5
 19003  	LONG $0xed680f66                           // punpckhbw    xmm5, xmm5
 19004  	LONG $0xead50f66                           // pmullw    xmm5, xmm2
 19005  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 19006  	LONG $0xf9d50f66                           // pmullw    xmm7, xmm1
 19007  	LONG $0xfbdb0f66                           // pand    xmm7, xmm3
 19008  	LONG $0xfd670f66                           // packuswb    xmm7, xmm5
 19009  	LONG $0x30380f66; BYTE $0xee               // pmovzxbw    xmm5, xmm6
 19010  	LONG $0xf6680f66                           // punpckhbw    xmm6, xmm6
 19011  	LONG $0xf4d50f66                           // pmullw    xmm6, xmm4
 19012  	LONG $0xf3db0f66                           // pand    xmm6, xmm3
 19013  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 19014  	LONG $0xebdb0f66                           // pand    xmm5, xmm3
 19015  	LONG $0xee670f66                           // packuswb    xmm5, xmm6
 19016  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 19017  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 19018  	LONG $0x40c08348                           // add    rax, 64
 19019  	LONG $0x02c68348                           // add    rsi, 2
 19020  	JNE  LBB2_590
 19021  	JMP  LBB2_982
 19022  
 19023  LBB2_591:
 19024  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19025  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 19026  	WORD $0xb60f; BYTE $0xd0     // movzx    edx, al
 19027  	LONG $0xc26e0f66             // movd    xmm0, edx
 19028  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 19029  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 19030  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 19031  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19032  	LONG $0x05e9c149             // shr    r9, 5
 19033  	LONG $0x01c18349             // add    r9, 1
 19034  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19035  	JE   LBB2_989
 19036  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19037  	LONG $0xfee28348             // and    rdx, -2
 19038  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19039  	WORD $0xff31                 // xor    edi, edi
 19040  
 19041  LBB2_593:
 19042  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19043  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19044  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19045  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19046  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 19047  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 19048  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 19049  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 19050  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19051  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19052  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 19053  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 19054  	LONG $0x40c78348                           // add    rdi, 64
 19055  	LONG $0x02c28348                           // add    rdx, 2
 19056  	JNE  LBB2_593
 19057  	JMP  LBB2_990
 19058  
 19059  LBB2_594:
 19060  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19061  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 19062  	LONG $0xd3b60f41             // movzx    edx, r11b
 19063  	LONG $0xc26e0f66             // movd    xmm0, edx
 19064  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 19065  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 19066  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 19067  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19068  	LONG $0x05e9c149             // shr    r9, 5
 19069  	LONG $0x01c18349             // add    r9, 1
 19070  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19071  	JE   LBB2_997
 19072  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19073  	LONG $0xfee28348             // and    rdx, -2
 19074  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19075  	WORD $0xff31                 // xor    edi, edi
 19076  
 19077  LBB2_596:
 19078  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19079  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19080  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19081  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19082  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19083  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 19084  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 19085  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
 19086  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 19087  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 19088  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19089  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19090  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19091  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 19092  	LONG $0x7f0f41f3; WORD $0x385c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm3
 19093  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
 19094  	LONG $0x40c78348                           // add    rdi, 64
 19095  	LONG $0x02c28348                           // add    rdx, 2
 19096  	JNE  LBB2_596
 19097  	JMP  LBB2_998
 19098  
 19099  LBB2_597:
 19100  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19101  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 19102  	WORD $0xb60f; BYTE $0xd0     // movzx    edx, al
 19103  	LONG $0xc26e0f66             // movd    xmm0, edx
 19104  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 19105  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 19106  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 19107  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19108  	LONG $0x05e9c149             // shr    r9, 5
 19109  	LONG $0x01c18349             // add    r9, 1
 19110  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19111  	JE   LBB2_1005
 19112  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19113  	LONG $0xfee28348             // and    rdx, -2
 19114  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19115  	WORD $0xff31                 // xor    edi, edi
 19116  
 19117  LBB2_599:
 19118  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19119  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19120  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19121  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19122  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 19123  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 19124  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 19125  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 19126  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19127  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19128  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm1
 19129  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm2
 19130  	LONG $0x40c78348                           // add    rdi, 64
 19131  	LONG $0x02c28348                           // add    rdx, 2
 19132  	JNE  LBB2_599
 19133  	JMP  LBB2_1006
 19134  
 19135  LBB2_600:
 19136  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19137  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 19138  	LONG $0xd3b60f41             // movzx    edx, r11b
 19139  	LONG $0xc26e0f66             // movd    xmm0, edx
 19140  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 19141  	LONG $0x00380f66; BYTE $0xc1 // pshufb    xmm0, xmm1
 19142  	LONG $0xe0568d48             // lea    rdx, [rsi - 32]
 19143  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19144  	LONG $0x05e9c149             // shr    r9, 5
 19145  	LONG $0x01c18349             // add    r9, 1
 19146  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19147  	JE   LBB2_1013
 19148  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19149  	LONG $0xfee28348             // and    rdx, -2
 19150  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19151  	WORD $0xff31                 // xor    edi, edi
 19152  
 19153  LBB2_602:
 19154  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19155  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19156  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19157  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19158  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19159  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 19160  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 19161  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm1
 19162  	LONG $0x4c6f0ff3; WORD $0x2039             // movdqu    xmm1, oword [rcx + rdi + 32]
 19163  	LONG $0x546f0ff3; WORD $0x3039             // movdqu    xmm2, oword [rcx + rdi + 48]
 19164  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19165  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19166  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19167  	LONG $0xcaf80f66                           // psubb    xmm1, xmm2
 19168  	LONG $0x7f0f41f3; WORD $0x385c; BYTE $0x20 // movdqu    oword [r8 + rdi + 32], xmm3
 19169  	LONG $0x7f0f41f3; WORD $0x384c; BYTE $0x30 // movdqu    oword [r8 + rdi + 48], xmm1
 19170  	LONG $0x40c78348                           // add    rdi, 64
 19171  	LONG $0x02c28348                           // add    rdx, 2
 19172  	JNE  LBB2_602
 19173  	JMP  LBB2_1014
 19174  
 19175  LBB2_603:
 19176  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19177  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19178  	LONG $0xc06e0f66             // movd    xmm0, eax
 19179  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19180  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19181  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19182  	LONG $0x03e9c149             // shr    r9, 3
 19183  	LONG $0x01c18349             // add    r9, 1
 19184  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19185  	JE   LBB2_1021
 19186  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19187  	LONG $0xfee28348             // and    rdx, -2
 19188  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19189  	WORD $0xff31                 // xor    edi, edi
 19190  
 19191  LBB2_605:
 19192  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19193  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19194  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19195  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19196  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19197  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19198  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19199  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19200  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19201  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19202  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 19203  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 19204  	LONG $0x10c78348                           // add    rdi, 16
 19205  	LONG $0x02c28348                           // add    rdx, 2
 19206  	JNE  LBB2_605
 19207  	JMP  LBB2_1022
 19208  
 19209  LBB2_606:
 19210  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19211  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19212  	LONG $0xc06e0f66             // movd    xmm0, eax
 19213  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19214  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19215  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19216  	LONG $0x03e9c149             // shr    r9, 3
 19217  	LONG $0x01c18349             // add    r9, 1
 19218  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19219  	JE   LBB2_1029
 19220  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19221  	LONG $0xfee28348             // and    rdx, -2
 19222  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19223  	WORD $0xff31                 // xor    edi, edi
 19224  
 19225  LBB2_608:
 19226  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19227  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19228  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19229  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19230  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19231  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19232  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19233  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19234  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19235  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19236  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 19237  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 19238  	LONG $0x10c78348                           // add    rdi, 16
 19239  	LONG $0x02c28348                           // add    rdx, 2
 19240  	JNE  LBB2_608
 19241  	JMP  LBB2_1030
 19242  
 19243  LBB2_609:
 19244  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19245  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19246  	LONG $0xc06e0f66             // movd    xmm0, eax
 19247  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19248  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19249  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19250  	LONG $0x03e9c149             // shr    r9, 3
 19251  	LONG $0x01c18349             // add    r9, 1
 19252  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19253  	JE   LBB2_1037
 19254  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19255  	LONG $0xfee28348             // and    rdx, -2
 19256  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19257  	WORD $0xff31                 // xor    edi, edi
 19258  
 19259  LBB2_611:
 19260  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19261  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19262  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19263  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19264  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19265  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19266  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19267  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19268  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19269  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19270  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 19271  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 19272  	LONG $0x10c78348                           // add    rdi, 16
 19273  	LONG $0x02c28348                           // add    rdx, 2
 19274  	JNE  LBB2_611
 19275  	JMP  LBB2_1038
 19276  
 19277  LBB2_612:
 19278  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19279  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19280  	LONG $0x6e0f4166; BYTE $0xc3 // movd    xmm0, r11d
 19281  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19282  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19283  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19284  	LONG $0x03e9c149             // shr    r9, 3
 19285  	LONG $0x01c18349             // add    r9, 1
 19286  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19287  	JE   LBB2_1045
 19288  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19289  	LONG $0xfee28348             // and    rdx, -2
 19290  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19291  	WORD $0xff31                 // xor    edi, edi
 19292  
 19293  LBB2_614:
 19294  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19295  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19296  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19297  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19298  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19299  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 19300  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 19301  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
 19302  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19303  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19304  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19305  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19306  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19307  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 19308  	LONG $0x7f0f41f3; WORD $0xb85c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm3
 19309  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
 19310  	LONG $0x10c78348                           // add    rdi, 16
 19311  	LONG $0x02c28348                           // add    rdx, 2
 19312  	JNE  LBB2_614
 19313  	JMP  LBB2_1046
 19314  
 19315  LBB2_615:
 19316  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19317  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19318  	LONG $0xc06e0f66             // movd    xmm0, eax
 19319  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19320  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19321  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19322  	LONG $0x03e9c149             // shr    r9, 3
 19323  	LONG $0x01c18349             // add    r9, 1
 19324  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19325  	JE   LBB2_1053
 19326  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19327  	LONG $0xfee28348             // and    rdx, -2
 19328  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19329  	WORD $0xff31                 // xor    edi, edi
 19330  
 19331  LBB2_617:
 19332  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19333  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19334  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19335  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19336  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19337  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19338  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19339  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19340  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19341  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19342  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm1
 19343  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm2
 19344  	LONG $0x10c78348                           // add    rdi, 16
 19345  	LONG $0x02c28348                           // add    rdx, 2
 19346  	JNE  LBB2_617
 19347  	JMP  LBB2_1054
 19348  
 19349  LBB2_618:
 19350  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 19351  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 19352  	LONG $0x6e0f4166; BYTE $0xc3 // movd    xmm0, r11d
 19353  	LONG $0xc0700f66; BYTE $0x00 // pshufd    xmm0, xmm0, 0
 19354  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 19355  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 19356  	LONG $0x03e9c149             // shr    r9, 3
 19357  	LONG $0x01c18349             // add    r9, 1
 19358  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 19359  	JE   LBB2_1061
 19360  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 19361  	LONG $0xfee28348             // and    rdx, -2
 19362  	WORD $0xf748; BYTE $0xda     // neg    rdx
 19363  	WORD $0xff31                 // xor    edi, edi
 19364  
 19365  LBB2_620:
 19366  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19367  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19368  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19369  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19370  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19371  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 19372  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 19373  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm1
 19374  	LONG $0x4c6f0ff3; WORD $0x20b9             // movdqu    xmm1, oword [rcx + 4*rdi + 32]
 19375  	LONG $0x546f0ff3; WORD $0x30b9             // movdqu    xmm2, oword [rcx + 4*rdi + 48]
 19376  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19377  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19378  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 19379  	LONG $0xcafa0f66                           // psubd    xmm1, xmm2
 19380  	LONG $0x7f0f41f3; WORD $0xb85c; BYTE $0x20 // movdqu    oword [r8 + 4*rdi + 32], xmm3
 19381  	LONG $0x7f0f41f3; WORD $0xb84c; BYTE $0x30 // movdqu    oword [r8 + 4*rdi + 48], xmm1
 19382  	LONG $0x10c78348                           // add    rdi, 16
 19383  	LONG $0x02c28348                           // add    rdx, 2
 19384  	JNE  LBB2_620
 19385  	JMP  LBB2_1062
 19386  
 19387  LBB2_621:
 19388  	WORD $0xff31 // xor    edi, edi
 19389  
 19390  LBB2_622:
 19391  	LONG $0x01c1f641                           // test    r9b, 1
 19392  	JE   LBB2_624
 19393  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19394  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19395  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19396  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19397  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19398  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19399  
 19400  LBB2_624:
 19401  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19402  	JE   LBB2_1069
 19403  	JMP  LBB2_625
 19404  
 19405  LBB2_629:
 19406  	WORD $0xff31 // xor    edi, edi
 19407  
 19408  LBB2_630:
 19409  	LONG $0x01c1f641                           // test    r9b, 1
 19410  	JE   LBB2_632
 19411  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19412  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19413  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 19414  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 19415  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19416  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19417  
 19418  LBB2_632:
 19419  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19420  	JE   LBB2_1069
 19421  	JMP  LBB2_633
 19422  
 19423  LBB2_637:
 19424  	WORD $0xff31 // xor    edi, edi
 19425  
 19426  LBB2_638:
 19427  	LONG $0x01c1f641                           // test    r9b, 1
 19428  	JE   LBB2_640
 19429  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19430  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19431  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19432  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19433  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19434  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19435  
 19436  LBB2_640:
 19437  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19438  	JE   LBB2_1069
 19439  	JMP  LBB2_641
 19440  
 19441  LBB2_645:
 19442  	WORD $0xff31 // xor    edi, edi
 19443  
 19444  LBB2_646:
 19445  	LONG $0x01c1f641                           // test    r9b, 1
 19446  	JE   LBB2_648
 19447  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19448  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19449  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19450  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19451  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
 19452  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 19453  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
 19454  
 19455  LBB2_648:
 19456  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19457  	JE   LBB2_1069
 19458  	JMP  LBB2_649
 19459  
 19460  LBB2_653:
 19461  	WORD $0xff31 // xor    edi, edi
 19462  
 19463  LBB2_654:
 19464  	LONG $0x01c1f641                           // test    r9b, 1
 19465  	JE   LBB2_656
 19466  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19467  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19468  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 19469  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 19470  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 19471  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 19472  
 19473  LBB2_656:
 19474  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19475  	JE   LBB2_1069
 19476  	JMP  LBB2_657
 19477  
 19478  LBB2_661:
 19479  	WORD $0xff31 // xor    edi, edi
 19480  
 19481  LBB2_662:
 19482  	LONG $0x01c1f641                           // test    r9b, 1
 19483  	JE   LBB2_664
 19484  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 19485  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 19486  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19487  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 19488  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
 19489  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 19490  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
 19491  
 19492  LBB2_664:
 19493  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19494  	JE   LBB2_1069
 19495  	JMP  LBB2_665
 19496  
 19497  LBB2_669:
 19498  	WORD $0xff31 // xor    edi, edi
 19499  
 19500  LBB2_670:
 19501  	LONG $0x01c1f641                           // test    r9b, 1
 19502  	JE   LBB2_672
 19503  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19504  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19505  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 19506  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 19507  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 19508  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 19509  
 19510  LBB2_672:
 19511  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19512  	JE   LBB2_1069
 19513  	JMP  LBB2_673
 19514  
 19515  LBB2_677:
 19516  	WORD $0xff31 // xor    edi, edi
 19517  
 19518  LBB2_678:
 19519  	LONG $0x01c1f641                           // test    r9b, 1
 19520  	JE   LBB2_680
 19521  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19522  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19523  	LONG $0xd1590f66                           // mulpd    xmm2, xmm1
 19524  	LONG $0xd9590f66                           // mulpd    xmm3, xmm1
 19525  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 19526  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 19527  
 19528  LBB2_680:
 19529  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19530  	JE   LBB2_1069
 19531  	JMP  LBB2_681
 19532  
 19533  LBB2_685:
 19534  	WORD $0xff31 // xor    edi, edi
 19535  
 19536  LBB2_686:
 19537  	LONG $0x01c1f641                           // test    r9b, 1
 19538  	JE   LBB2_688
 19539  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19540  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19541  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 19542  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 19543  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 19544  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 19545  
 19546  LBB2_688:
 19547  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19548  	JE   LBB2_1069
 19549  	JMP  LBB2_689
 19550  
 19551  LBB2_693:
 19552  	WORD $0xff31 // xor    edi, edi
 19553  
 19554  LBB2_694:
 19555  	LONG $0x01c1f641                           // test    r9b, 1
 19556  	JE   LBB2_696
 19557  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19558  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19559  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 19560  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 19561  	LONG $0xcb5c0f66                           // subpd    xmm1, xmm3
 19562  	LONG $0x110f4166; WORD $0xf824             // movupd    oword [r8 + 8*rdi], xmm4
 19563  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
 19564  
 19565  LBB2_696:
 19566  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19567  	JE   LBB2_1069
 19568  	JMP  LBB2_697
 19569  
 19570  LBB2_701:
 19571  	WORD $0xff31 // xor    edi, edi
 19572  
 19573  LBB2_702:
 19574  	LONG $0x01c1f641                           // test    r9b, 1
 19575  	JE   LBB2_704
 19576  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19577  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19578  	LONG $0xd1580f66                           // addpd    xmm2, xmm1
 19579  	LONG $0xd9580f66                           // addpd    xmm3, xmm1
 19580  	LONG $0x110f4166; WORD $0xf814             // movupd    oword [r8 + 8*rdi], xmm2
 19581  	LONG $0x110f4166; WORD $0xf85c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm3
 19582  
 19583  LBB2_704:
 19584  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19585  	JE   LBB2_1069
 19586  	JMP  LBB2_705
 19587  
 19588  LBB2_709:
 19589  	WORD $0xff31 // xor    edi, edi
 19590  
 19591  LBB2_710:
 19592  	LONG $0x01c1f641                           // test    r9b, 1
 19593  	JE   LBB2_712
 19594  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 19595  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 19596  	LONG $0xe1280f66                           // movapd    xmm4, xmm1
 19597  	LONG $0xe25c0f66                           // subpd    xmm4, xmm2
 19598  	LONG $0xcb5c0f66                           // subpd    xmm1, xmm3
 19599  	LONG $0x110f4166; WORD $0xf824             // movupd    oword [r8 + 8*rdi], xmm4
 19600  	LONG $0x110f4166; WORD $0xf84c; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm1
 19601  
 19602  LBB2_712:
 19603  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 19604  	JE   LBB2_1069
 19605  	JMP  LBB2_713
 19606  
 19607  LBB2_717:
 19608  	WORD $0xc031 // xor    eax, eax
 19609  
 19610  LBB2_718:
 19611  	LONG $0x01c1f641                           // test    r9b, 1
 19612  	JE   LBB2_720
 19613  	LONG $0x146f0ff3; BYTE $0x01               // movdqu    xmm2, oword [rcx + rax]
 19614  	LONG $0x5c6f0ff3; WORD $0x1001             // movdqu    xmm3, oword [rcx + rax + 16]
 19615  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 19616  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 19617  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 19618  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 19619  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 19620  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI2_0] */
 19621  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 19622  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 19623  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 19624  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 19625  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 19626  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 19627  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 19628  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 19629  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 19630  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 19631  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 19632  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 19633  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 19634  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 19635  
 19636  LBB2_720:
 19637  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 19638  	JE   LBB2_1069
 19639  	JMP  LBB2_721
 19640  
 19641  LBB2_725:
 19642  	WORD $0xc031 // xor    eax, eax
 19643  
 19644  LBB2_726:
 19645  	LONG $0x01c1f641                           // test    r9b, 1
 19646  	JE   LBB2_728
 19647  	LONG $0x146f0ff3; BYTE $0x01               // movdqu    xmm2, oword [rcx + rax]
 19648  	LONG $0x5c6f0ff3; WORD $0x1001             // movdqu    xmm3, oword [rcx + rax + 16]
 19649  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 19650  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 19651  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 19652  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 19653  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 19654  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI2_0] */
 19655  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 19656  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 19657  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 19658  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 19659  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 19660  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 19661  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 19662  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 19663  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 19664  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 19665  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 19666  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 19667  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 19668  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 19669  
 19670  LBB2_728:
 19671  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 19672  	JE   LBB2_1069
 19673  	JMP  LBB2_729
 19674  
 19675  LBB2_733:
 19676  	WORD $0xff31 // xor    edi, edi
 19677  
 19678  LBB2_734:
 19679  	LONG $0x01c1f641                           // test    r9b, 1
 19680  	JE   LBB2_736
 19681  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19682  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19683  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19684  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19685  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 19686  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 19687  
 19688  LBB2_736:
 19689  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19690  	JE   LBB2_1069
 19691  	JMP  LBB2_737
 19692  
 19693  LBB2_741:
 19694  	WORD $0xff31 // xor    edi, edi
 19695  
 19696  LBB2_742:
 19697  	LONG $0x01c1f641                           // test    r9b, 1
 19698  	JE   LBB2_744
 19699  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19700  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19701  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19702  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19703  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
 19704  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 19705  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
 19706  
 19707  LBB2_744:
 19708  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19709  	JE   LBB2_1069
 19710  	JMP  LBB2_745
 19711  
 19712  LBB2_749:
 19713  	WORD $0xff31 // xor    edi, edi
 19714  
 19715  LBB2_750:
 19716  	LONG $0x01c1f641                           // test    r9b, 1
 19717  	JE   LBB2_752
 19718  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19719  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19720  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 19721  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 19722  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 19723  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 19724  
 19725  LBB2_752:
 19726  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19727  	JE   LBB2_1069
 19728  	JMP  LBB2_753
 19729  
 19730  LBB2_757:
 19731  	WORD $0xff31 // xor    edi, edi
 19732  
 19733  LBB2_758:
 19734  	LONG $0x01c1f641                           // test    r9b, 1
 19735  	JE   LBB2_760
 19736  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 19737  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 19738  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19739  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 19740  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
 19741  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 19742  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
 19743  
 19744  LBB2_760:
 19745  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19746  	JE   LBB2_1069
 19747  	JMP  LBB2_761
 19748  
 19749  LBB2_765:
 19750  	WORD $0xff31 // xor    edi, edi
 19751  
 19752  LBB2_766:
 19753  	LONG $0x01c1f641                           // test    r9b, 1
 19754  	JE   LBB2_768
 19755  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 19756  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 19757  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 19758  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 19759  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 19760  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 19761  
 19762  LBB2_768:
 19763  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19764  	JE   LBB2_1069
 19765  	JMP  LBB2_769
 19766  
 19767  LBB2_773:
 19768  	WORD $0xff31 // xor    edi, edi
 19769  
 19770  LBB2_774:
 19771  	LONG $0x01c1f641                           // test    r9b, 1
 19772  	JE   LBB2_776
 19773  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 19774  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 19775  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19776  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 19777  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
 19778  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 19779  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
 19780  
 19781  LBB2_776:
 19782  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19783  	JE   LBB2_1069
 19784  	JMP  LBB2_777
 19785  
 19786  LBB2_781:
 19787  	WORD $0xff31 // xor    edi, edi
 19788  
 19789  LBB2_782:
 19790  	LONG $0x01c1f641                           // test    r9b, 1
 19791  	JE   LBB2_784
 19792  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 19793  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 19794  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 19795  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 19796  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 19797  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 19798  
 19799  LBB2_784:
 19800  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19801  	JE   LBB2_1069
 19802  	JMP  LBB2_785
 19803  
 19804  LBB2_789:
 19805  	WORD $0xff31 // xor    edi, edi
 19806  
 19807  LBB2_790:
 19808  	LONG $0x01c1f641                           // test    r9b, 1
 19809  	JE   LBB2_792
 19810  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 19811  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 19812  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19813  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 19814  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
 19815  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 19816  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
 19817  
 19818  LBB2_792:
 19819  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19820  	JE   LBB2_1069
 19821  	JMP  LBB2_793
 19822  
 19823  LBB2_797:
 19824  	WORD $0xff31 // xor    edi, edi
 19825  
 19826  LBB2_798:
 19827  	LONG $0x01c1f641                           // test    r9b, 1
 19828  	JE   LBB2_800
 19829  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19830  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19831  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 19832  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 19833  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19834  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19835  
 19836  LBB2_800:
 19837  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19838  	JE   LBB2_1069
 19839  	JMP  LBB2_801
 19840  
 19841  LBB2_805:
 19842  	WORD $0xff31 // xor    edi, edi
 19843  
 19844  LBB2_806:
 19845  	LONG $0x01c1f641                           // test    r9b, 1
 19846  	JE   LBB2_808
 19847  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19848  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19849  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 19850  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 19851  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19852  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19853  
 19854  LBB2_808:
 19855  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19856  	JE   LBB2_1069
 19857  	JMP  LBB2_809
 19858  
 19859  LBB2_813:
 19860  	WORD $0xff31 // xor    edi, edi
 19861  
 19862  LBB2_814:
 19863  	LONG $0x01c1f641                           // test    r9b, 1
 19864  	JE   LBB2_816
 19865  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19866  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19867  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 19868  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 19869  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19870  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19871  
 19872  LBB2_816:
 19873  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19874  	JE   LBB2_1069
 19875  	JMP  LBB2_817
 19876  
 19877  LBB2_821:
 19878  	WORD $0xff31 // xor    edi, edi
 19879  
 19880  LBB2_822:
 19881  	LONG $0x01c1f641                           // test    r9b, 1
 19882  	JE   LBB2_824
 19883  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19884  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19885  	LONG $0xc8d50f66                           // pmullw    xmm1, xmm0
 19886  	LONG $0xd0d50f66                           // pmullw    xmm2, xmm0
 19887  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19888  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19889  
 19890  LBB2_824:
 19891  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19892  	JE   LBB2_1069
 19893  	JMP  LBB2_825
 19894  
 19895  LBB2_829:
 19896  	WORD $0xff31 // xor    edi, edi
 19897  
 19898  LBB2_830:
 19899  	LONG $0x01c1f641                           // test    r9b, 1
 19900  	JE   LBB2_832
 19901  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19902  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19903  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 19904  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 19905  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19906  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19907  
 19908  LBB2_832:
 19909  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19910  	JE   LBB2_1069
 19911  	JMP  LBB2_833
 19912  
 19913  LBB2_837:
 19914  	WORD $0xff31 // xor    edi, edi
 19915  
 19916  LBB2_838:
 19917  	LONG $0x01c1f641                           // test    r9b, 1
 19918  	JE   LBB2_840
 19919  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19920  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19921  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 19922  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 19923  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19924  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19925  
 19926  LBB2_840:
 19927  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19928  	JE   LBB2_1069
 19929  	JMP  LBB2_841
 19930  
 19931  LBB2_845:
 19932  	WORD $0xff31 // xor    edi, edi
 19933  
 19934  LBB2_846:
 19935  	LONG $0x01c1f641                           // test    r9b, 1
 19936  	JE   LBB2_848
 19937  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19938  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19939  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19940  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 19941  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
 19942  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 19943  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
 19944  
 19945  LBB2_848:
 19946  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19947  	JE   LBB2_1069
 19948  	JMP  LBB2_849
 19949  
 19950  LBB2_853:
 19951  	WORD $0xff31 // xor    edi, edi
 19952  
 19953  LBB2_854:
 19954  	LONG $0x01c1f641                           // test    r9b, 1
 19955  	JE   LBB2_856
 19956  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19957  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19958  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 19959  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 19960  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
 19961  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 19962  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
 19963  
 19964  LBB2_856:
 19965  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19966  	JE   LBB2_1069
 19967  	JMP  LBB2_857
 19968  
 19969  LBB2_861:
 19970  	WORD $0xff31 // xor    edi, edi
 19971  
 19972  LBB2_862:
 19973  	LONG $0x01c1f641                           // test    r9b, 1
 19974  	JE   LBB2_864
 19975  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19976  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19977  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 19978  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 19979  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19980  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19981  
 19982  LBB2_864:
 19983  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 19984  	JE   LBB2_1069
 19985  	JMP  LBB2_865
 19986  
 19987  LBB2_869:
 19988  	WORD $0xff31 // xor    edi, edi
 19989  
 19990  LBB2_870:
 19991  	LONG $0x01c1f641                           // test    r9b, 1
 19992  	JE   LBB2_872
 19993  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 19994  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 19995  	LONG $0xc8fd0f66                           // paddw    xmm1, xmm0
 19996  	LONG $0xd0fd0f66                           // paddw    xmm2, xmm0
 19997  	LONG $0x7f0f41f3; WORD $0x780c             // movdqu    oword [r8 + 2*rdi], xmm1
 19998  	LONG $0x7f0f41f3; WORD $0x7854; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm2
 19999  
 20000  LBB2_872:
 20001  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20002  	JE   LBB2_1069
 20003  	JMP  LBB2_873
 20004  
 20005  LBB2_877:
 20006  	WORD $0xff31 // xor    edi, edi
 20007  
 20008  LBB2_878:
 20009  	LONG $0x01c1f641                           // test    r9b, 1
 20010  	JE   LBB2_880
 20011  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 20012  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 20013  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20014  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 20015  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
 20016  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 20017  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
 20018  
 20019  LBB2_880:
 20020  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20021  	JE   LBB2_1069
 20022  	JMP  LBB2_881
 20023  
 20024  LBB2_885:
 20025  	WORD $0xff31 // xor    edi, edi
 20026  
 20027  LBB2_886:
 20028  	LONG $0x01c1f641                           // test    r9b, 1
 20029  	JE   LBB2_888
 20030  	LONG $0x0c6f0ff3; BYTE $0x79               // movdqu    xmm1, oword [rcx + 2*rdi]
 20031  	LONG $0x546f0ff3; WORD $0x1079             // movdqu    xmm2, oword [rcx + 2*rdi + 16]
 20032  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20033  	LONG $0xd9f90f66                           // psubw    xmm3, xmm1
 20034  	LONG $0xc2f90f66                           // psubw    xmm0, xmm2
 20035  	LONG $0x7f0f41f3; WORD $0x781c             // movdqu    oword [r8 + 2*rdi], xmm3
 20036  	LONG $0x7f0f41f3; WORD $0x7844; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm0
 20037  
 20038  LBB2_888:
 20039  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20040  	JE   LBB2_1069
 20041  	JMP  LBB2_889
 20042  
 20043  LBB2_893:
 20044  	WORD $0xff31 // xor    edi, edi
 20045  
 20046  LBB2_894:
 20047  	LONG $0x01c1f641               // test    r9b, 1
 20048  	JE   LBB2_896
 20049  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20050  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20051  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 20052  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 20053  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 20054  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 20055  
 20056  LBB2_896:
 20057  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20058  	JE   LBB2_1069
 20059  	JMP  LBB2_897
 20060  
 20061  LBB2_901:
 20062  	WORD $0xff31 // xor    edi, edi
 20063  
 20064  LBB2_902:
 20065  	LONG $0x01c1f641               // test    r9b, 1
 20066  	JE   LBB2_904
 20067  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20068  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20069  	WORD $0x590f; BYTE $0xd1       // mulps    xmm2, xmm1
 20070  	WORD $0x590f; BYTE $0xd9       // mulps    xmm3, xmm1
 20071  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 20072  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 20073  
 20074  LBB2_904:
 20075  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20076  	JE   LBB2_1069
 20077  	JMP  LBB2_905
 20078  
 20079  LBB2_909:
 20080  	WORD $0xff31 // xor    edi, edi
 20081  
 20082  LBB2_910:
 20083  	LONG $0x01c1f641                           // test    r9b, 1
 20084  	JE   LBB2_912
 20085  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 20086  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 20087  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 20088  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 20089  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 20090  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 20091  
 20092  LBB2_912:
 20093  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20094  	JE   LBB2_1069
 20095  	JMP  LBB2_913
 20096  
 20097  LBB2_917:
 20098  	WORD $0xff31 // xor    edi, edi
 20099  
 20100  LBB2_918:
 20101  	LONG $0x01c1f641               // test    r9b, 1
 20102  	JE   LBB2_920
 20103  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20104  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20105  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 20106  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 20107  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 20108  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 20109  
 20110  LBB2_920:
 20111  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20112  	JE   LBB2_1069
 20113  	JMP  LBB2_921
 20114  
 20115  LBB2_925:
 20116  	WORD $0xff31 // xor    edi, edi
 20117  
 20118  LBB2_926:
 20119  	LONG $0x01c1f641                           // test    r9b, 1
 20120  	JE   LBB2_928
 20121  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 20122  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 20123  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20124  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 20125  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
 20126  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 20127  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
 20128  
 20129  LBB2_928:
 20130  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20131  	JE   LBB2_1069
 20132  	JMP  LBB2_929
 20133  
 20134  LBB2_933:
 20135  	WORD $0xff31 // xor    edi, edi
 20136  
 20137  LBB2_934:
 20138  	LONG $0x01c1f641               // test    r9b, 1
 20139  	JE   LBB2_936
 20140  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20141  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20142  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 20143  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 20144  	WORD $0x5c0f; BYTE $0xcb       // subps    xmm1, xmm3
 20145  	LONG $0x24110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm4
 20146  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
 20147  
 20148  LBB2_936:
 20149  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20150  	JE   LBB2_1069
 20151  	JMP  LBB2_937
 20152  
 20153  LBB2_941:
 20154  	WORD $0xff31 // xor    edi, edi
 20155  
 20156  LBB2_942:
 20157  	LONG $0x01c1f641                           // test    r9b, 1
 20158  	JE   LBB2_944
 20159  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 20160  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 20161  	LONG $0xc8d40f66                           // paddq    xmm1, xmm0
 20162  	LONG $0xd0d40f66                           // paddq    xmm2, xmm0
 20163  	LONG $0x7f0f41f3; WORD $0xf80c             // movdqu    oword [r8 + 8*rdi], xmm1
 20164  	LONG $0x7f0f41f3; WORD $0xf854; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm2
 20165  
 20166  LBB2_944:
 20167  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20168  	JE   LBB2_1069
 20169  	JMP  LBB2_945
 20170  
 20171  LBB2_949:
 20172  	WORD $0xff31 // xor    edi, edi
 20173  
 20174  LBB2_950:
 20175  	LONG $0x01c1f641               // test    r9b, 1
 20176  	JE   LBB2_952
 20177  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20178  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20179  	WORD $0x580f; BYTE $0xd1       // addps    xmm2, xmm1
 20180  	WORD $0x580f; BYTE $0xd9       // addps    xmm3, xmm1
 20181  	LONG $0x14110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm2
 20182  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 20183  
 20184  LBB2_952:
 20185  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20186  	JE   LBB2_1069
 20187  	JMP  LBB2_953
 20188  
 20189  LBB2_957:
 20190  	WORD $0xff31 // xor    edi, edi
 20191  
 20192  LBB2_958:
 20193  	LONG $0x01c1f641                           // test    r9b, 1
 20194  	JE   LBB2_960
 20195  	LONG $0x0c6f0ff3; BYTE $0xf9               // movdqu    xmm1, oword [rcx + 8*rdi]
 20196  	LONG $0x546f0ff3; WORD $0x10f9             // movdqu    xmm2, oword [rcx + 8*rdi + 16]
 20197  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20198  	LONG $0xd9fb0f66                           // psubq    xmm3, xmm1
 20199  	LONG $0xc2fb0f66                           // psubq    xmm0, xmm2
 20200  	LONG $0x7f0f41f3; WORD $0xf81c             // movdqu    oword [r8 + 8*rdi], xmm3
 20201  	LONG $0x7f0f41f3; WORD $0xf844; BYTE $0x10 // movdqu    oword [r8 + 8*rdi + 16], xmm0
 20202  
 20203  LBB2_960:
 20204  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20205  	JE   LBB2_1069
 20206  	JMP  LBB2_961
 20207  
 20208  LBB2_965:
 20209  	WORD $0xff31 // xor    edi, edi
 20210  
 20211  LBB2_966:
 20212  	LONG $0x01c1f641               // test    r9b, 1
 20213  	JE   LBB2_968
 20214  	LONG $0xb914100f               // movups    xmm2, oword [rcx + 4*rdi]
 20215  	LONG $0xb95c100f; BYTE $0x10   // movups    xmm3, oword [rcx + 4*rdi + 16]
 20216  	WORD $0x280f; BYTE $0xe1       // movaps    xmm4, xmm1
 20217  	WORD $0x5c0f; BYTE $0xe2       // subps    xmm4, xmm2
 20218  	WORD $0x5c0f; BYTE $0xcb       // subps    xmm1, xmm3
 20219  	LONG $0x24110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm4
 20220  	LONG $0x4c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm1
 20221  
 20222  LBB2_968:
 20223  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 20224  	JE   LBB2_1069
 20225  	JMP  LBB2_969
 20226  
 20227  LBB2_973:
 20228  	WORD $0xc031 // xor    eax, eax
 20229  
 20230  LBB2_974:
 20231  	LONG $0x01c1f641                           // test    r9b, 1
 20232  	JE   LBB2_976
 20233  	LONG $0x146f0ff3; BYTE $0x01               // movdqu    xmm2, oword [rcx + rax]
 20234  	LONG $0x5c6f0ff3; WORD $0x1001             // movdqu    xmm3, oword [rcx + rax + 16]
 20235  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 20236  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 20237  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 20238  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 20239  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 20240  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI2_0] */
 20241  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 20242  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 20243  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 20244  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 20245  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 20246  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 20247  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 20248  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 20249  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 20250  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 20251  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 20252  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 20253  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 20254  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 20255  
 20256  LBB2_976:
 20257  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 20258  	JE   LBB2_1069
 20259  	JMP  LBB2_977
 20260  
 20261  LBB2_981:
 20262  	WORD $0xc031 // xor    eax, eax
 20263  
 20264  LBB2_982:
 20265  	LONG $0x01c1f641                           // test    r9b, 1
 20266  	JE   LBB2_984
 20267  	LONG $0x146f0ff3; BYTE $0x01               // movdqu    xmm2, oword [rcx + rax]
 20268  	LONG $0x5c6f0ff3; WORD $0x1001             // movdqu    xmm3, oword [rcx + rax + 16]
 20269  	LONG $0xe06f0f66                           // movdqa    xmm4, xmm0
 20270  	LONG $0xe4680f66                           // punpckhbw    xmm4, xmm4
 20271  	LONG $0x30380f66; BYTE $0xea               // pmovzxbw    xmm5, xmm2
 20272  	LONG $0xd2680f66                           // punpckhbw    xmm2, xmm2
 20273  	LONG $0xd4d50f66                           // pmullw    xmm2, xmm4
 20274  	LONG $0x656f0f66; BYTE $0x00               // movdqa    xmm4, oword 0[rbp] /* [rip + .LCPI2_0] */
 20275  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 20276  	LONG $0xe9d50f66                           // pmullw    xmm5, xmm1
 20277  	LONG $0xecdb0f66                           // pand    xmm5, xmm4
 20278  	LONG $0xea670f66                           // packuswb    xmm5, xmm2
 20279  	LONG $0xc0680f66                           // punpckhbw    xmm0, xmm0
 20280  	LONG $0x30380f66; BYTE $0xd3               // pmovzxbw    xmm2, xmm3
 20281  	LONG $0xdb680f66                           // punpckhbw    xmm3, xmm3
 20282  	LONG $0xd8d50f66                           // pmullw    xmm3, xmm0
 20283  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 20284  	LONG $0xd1d50f66                           // pmullw    xmm2, xmm1
 20285  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 20286  	LONG $0xd3670f66                           // packuswb    xmm2, xmm3
 20287  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 20288  	LONG $0x7f0f41f3; WORD $0x0054; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm2
 20289  
 20290  LBB2_984:
 20291  	WORD $0x394c; BYTE $0xd7 // cmp    rdi, r10
 20292  	JE   LBB2_1069
 20293  	JMP  LBB2_985
 20294  
 20295  LBB2_989:
 20296  	WORD $0xff31 // xor    edi, edi
 20297  
 20298  LBB2_990:
 20299  	LONG $0x01c1f641                           // test    r9b, 1
 20300  	JE   LBB2_992
 20301  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 20302  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 20303  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 20304  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 20305  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 20306  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 20307  
 20308  LBB2_992:
 20309  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20310  	JE   LBB2_1069
 20311  	JMP  LBB2_993
 20312  
 20313  LBB2_997:
 20314  	WORD $0xff31 // xor    edi, edi
 20315  
 20316  LBB2_998:
 20317  	LONG $0x01c1f641                           // test    r9b, 1
 20318  	JE   LBB2_1000
 20319  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 20320  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 20321  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20322  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 20323  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
 20324  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 20325  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
 20326  
 20327  LBB2_1000:
 20328  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20329  	JE   LBB2_1069
 20330  	JMP  LBB2_1001
 20331  
 20332  LBB2_1005:
 20333  	WORD $0xff31 // xor    edi, edi
 20334  
 20335  LBB2_1006:
 20336  	LONG $0x01c1f641                           // test    r9b, 1
 20337  	JE   LBB2_1008
 20338  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 20339  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 20340  	LONG $0xc8fc0f66                           // paddb    xmm1, xmm0
 20341  	LONG $0xd0fc0f66                           // paddb    xmm2, xmm0
 20342  	LONG $0x7f0f41f3; WORD $0x380c             // movdqu    oword [r8 + rdi], xmm1
 20343  	LONG $0x7f0f41f3; WORD $0x3854; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm2
 20344  
 20345  LBB2_1008:
 20346  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20347  	JE   LBB2_1069
 20348  	JMP  LBB2_1009
 20349  
 20350  LBB2_1013:
 20351  	WORD $0xff31 // xor    edi, edi
 20352  
 20353  LBB2_1014:
 20354  	LONG $0x01c1f641                           // test    r9b, 1
 20355  	JE   LBB2_1016
 20356  	LONG $0x0c6f0ff3; BYTE $0x39               // movdqu    xmm1, oword [rcx + rdi]
 20357  	LONG $0x546f0ff3; WORD $0x1039             // movdqu    xmm2, oword [rcx + rdi + 16]
 20358  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20359  	LONG $0xd9f80f66                           // psubb    xmm3, xmm1
 20360  	LONG $0xc2f80f66                           // psubb    xmm0, xmm2
 20361  	LONG $0x7f0f41f3; WORD $0x381c             // movdqu    oword [r8 + rdi], xmm3
 20362  	LONG $0x7f0f41f3; WORD $0x3844; BYTE $0x10 // movdqu    oword [r8 + rdi + 16], xmm0
 20363  
 20364  LBB2_1016:
 20365  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20366  	JE   LBB2_1069
 20367  	JMP  LBB2_1017
 20368  
 20369  LBB2_1021:
 20370  	WORD $0xff31 // xor    edi, edi
 20371  
 20372  LBB2_1022:
 20373  	LONG $0x01c1f641                           // test    r9b, 1
 20374  	JE   LBB2_1024
 20375  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20376  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20377  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 20378  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 20379  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 20380  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 20381  
 20382  LBB2_1024:
 20383  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20384  	JE   LBB2_1069
 20385  	JMP  LBB2_1025
 20386  
 20387  LBB2_1029:
 20388  	WORD $0xff31 // xor    edi, edi
 20389  
 20390  LBB2_1030:
 20391  	LONG $0x01c1f641                           // test    r9b, 1
 20392  	JE   LBB2_1032
 20393  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20394  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20395  	LONG $0x40380f66; BYTE $0xc8               // pmulld    xmm1, xmm0
 20396  	LONG $0x40380f66; BYTE $0xd0               // pmulld    xmm2, xmm0
 20397  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 20398  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 20399  
 20400  LBB2_1032:
 20401  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20402  	JE   LBB2_1069
 20403  	JMP  LBB2_1033
 20404  
 20405  LBB2_1037:
 20406  	WORD $0xff31 // xor    edi, edi
 20407  
 20408  LBB2_1038:
 20409  	LONG $0x01c1f641                           // test    r9b, 1
 20410  	JE   LBB2_1040
 20411  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20412  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20413  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 20414  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 20415  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 20416  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 20417  
 20418  LBB2_1040:
 20419  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20420  	JE   LBB2_1069
 20421  	JMP  LBB2_1041
 20422  
 20423  LBB2_1045:
 20424  	WORD $0xff31 // xor    edi, edi
 20425  
 20426  LBB2_1046:
 20427  	LONG $0x01c1f641                           // test    r9b, 1
 20428  	JE   LBB2_1048
 20429  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20430  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20431  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20432  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 20433  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
 20434  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 20435  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
 20436  
 20437  LBB2_1048:
 20438  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20439  	JE   LBB2_1069
 20440  	JMP  LBB2_1049
 20441  
 20442  LBB2_1053:
 20443  	WORD $0xff31 // xor    edi, edi
 20444  
 20445  LBB2_1054:
 20446  	LONG $0x01c1f641                           // test    r9b, 1
 20447  	JE   LBB2_1056
 20448  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20449  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20450  	LONG $0xc8fe0f66                           // paddd    xmm1, xmm0
 20451  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 20452  	LONG $0x7f0f41f3; WORD $0xb80c             // movdqu    oword [r8 + 4*rdi], xmm1
 20453  	LONG $0x7f0f41f3; WORD $0xb854; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm2
 20454  
 20455  LBB2_1056:
 20456  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20457  	JE   LBB2_1069
 20458  	JMP  LBB2_1057
 20459  
 20460  LBB2_1061:
 20461  	WORD $0xff31 // xor    edi, edi
 20462  
 20463  LBB2_1062:
 20464  	LONG $0x01c1f641                           // test    r9b, 1
 20465  	JE   LBB2_1064
 20466  	LONG $0x0c6f0ff3; BYTE $0xb9               // movdqu    xmm1, oword [rcx + 4*rdi]
 20467  	LONG $0x546f0ff3; WORD $0x10b9             // movdqu    xmm2, oword [rcx + 4*rdi + 16]
 20468  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 20469  	LONG $0xd9fa0f66                           // psubd    xmm3, xmm1
 20470  	LONG $0xc2fa0f66                           // psubd    xmm0, xmm2
 20471  	LONG $0x7f0f41f3; WORD $0xb81c             // movdqu    oword [r8 + 4*rdi], xmm3
 20472  	LONG $0x7f0f41f3; WORD $0xb844; BYTE $0x10 // movdqu    oword [r8 + 4*rdi + 16], xmm0
 20473  
 20474  LBB2_1064:
 20475  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 20476  	JE   LBB2_1069
 20477  	JMP  LBB2_1065
 20478  
 20479  DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
 20480  DATA LCDATA4<>+0x008(SB)/8, $0x8000000000000000
 20481  DATA LCDATA4<>+0x010(SB)/8, $0x3ff0000000000000
 20482  DATA LCDATA4<>+0x018(SB)/8, $0x3ff0000000000000
 20483  DATA LCDATA4<>+0x020(SB)/8, $0x0000000100000001
 20484  DATA LCDATA4<>+0x028(SB)/8, $0x0000000100000001
 20485  DATA LCDATA4<>+0x030(SB)/8, $0x0000000000000001
 20486  DATA LCDATA4<>+0x038(SB)/8, $0x0000000000000001
 20487  DATA LCDATA4<>+0x040(SB)/8, $0x0001000100010001
 20488  DATA LCDATA4<>+0x048(SB)/8, $0x0001000100010001
 20489  DATA LCDATA4<>+0x050(SB)/8, $0x0101010101010101
 20490  DATA LCDATA4<>+0x058(SB)/8, $0x0101010101010101
 20491  DATA LCDATA4<>+0x060(SB)/8, $0x8000000080000000
 20492  DATA LCDATA4<>+0x068(SB)/8, $0x8000000080000000
 20493  DATA LCDATA4<>+0x070(SB)/8, $0x7fffffffffffffff
 20494  DATA LCDATA4<>+0x078(SB)/8, $0x7fffffffffffffff
 20495  DATA LCDATA4<>+0x080(SB)/8, $0x7fffffff7fffffff
 20496  DATA LCDATA4<>+0x088(SB)/8, $0x7fffffff7fffffff
 20497  DATA LCDATA4<>+0x090(SB)/8, $0x000000ff000000ff
 20498  DATA LCDATA4<>+0x098(SB)/8, $0x000000ff000000ff
 20499  DATA LCDATA4<>+0x0a0(SB)/8, $0x3ff0000000000000
 20500  GLOBL LCDATA4<>(SB), 8, $168
 20501  
 20502  TEXT ยท_arithmetic_unary_same_types_sse4(SB), $0-40
 20503  
 20504  	MOVQ typ+0(FP), DI
 20505  	MOVQ op+8(FP), SI
 20506  	MOVQ input+16(FP), DX
 20507  	MOVQ output+24(FP), CX
 20508  	MOVQ len+32(FP), R8
 20509  	LEAQ LCDATA4<>(SB), BP
 20510  
 20511  	LONG $0x13fe8040         // cmp    sil, 19
 20512  	JLE  LBB3_12
 20513  	LONG $0x14fe8040         // cmp    sil, 20
 20514  	JE   LBB3_22
 20515  	LONG $0x19fe8040         // cmp    sil, 25
 20516  	JE   LBB3_30
 20517  	LONG $0x1afe8040         // cmp    sil, 26
 20518  	JNE  LBB3_923
 20519  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20520  	JG   LBB3_46
 20521  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 20522  	JLE  LBB3_81
 20523  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 20524  	JE   LBB3_131
 20525  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 20526  	JE   LBB3_134
 20527  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20528  	JNE  LBB3_923
 20529  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20530  	JLE  LBB3_923
 20531  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20532  	LONG $0x08f88341         // cmp    r8d, 8
 20533  	JAE  LBB3_221
 20534  	WORD $0xd231             // xor    edx, edx
 20535  	JMP  LBB3_373
 20536  
 20537  LBB3_12:
 20538  	LONG $0x04fe8040         // cmp    sil, 4
 20539  	JE   LBB3_38
 20540  	LONG $0x05fe8040         // cmp    sil, 5
 20541  	JNE  LBB3_923
 20542  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20543  	JG   LBB3_53
 20544  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 20545  	JLE  LBB3_86
 20546  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 20547  	JE   LBB3_137
 20548  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 20549  	JE   LBB3_140
 20550  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20551  	JNE  LBB3_923
 20552  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20553  	JLE  LBB3_923
 20554  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20555  	LONG $0x08f88341         // cmp    r8d, 8
 20556  	JB   LBB3_21
 20557  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 20558  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20559  	JBE  LBB3_374
 20560  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 20561  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20562  	JBE  LBB3_374
 20563  
 20564  LBB3_21:
 20565  	WORD $0xf631 // xor    esi, esi
 20566  
 20567  LBB3_614:
 20568  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 20569  	WORD $0xf749; BYTE $0xd0 // not    r8
 20570  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 20571  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 20572  	LONG $0x03e78348         // and    rdi, 3
 20573  	JE   LBB3_616
 20574  
 20575  LBB3_615:
 20576  	WORD $0xc031             // xor    eax, eax
 20577  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 20578  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20579  	LONG $0x01c68348         // add    rsi, 1
 20580  	LONG $0xffc78348         // add    rdi, -1
 20581  	JNE  LBB3_615
 20582  
 20583  LBB3_616:
 20584  	LONG $0x03f88349 // cmp    r8, 3
 20585  	JB   LBB3_923
 20586  
 20587  LBB3_617:
 20588  	WORD $0xc031             // xor    eax, eax
 20589  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 20590  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20591  	WORD $0xc031             // xor    eax, eax
 20592  	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
 20593  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
 20594  	WORD $0xc031             // xor    eax, eax
 20595  	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
 20596  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
 20597  	WORD $0xc031             // xor    eax, eax
 20598  	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
 20599  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
 20600  	LONG $0x04c68348         // add    rsi, 4
 20601  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 20602  	JNE  LBB3_617
 20603  	JMP  LBB3_923
 20604  
 20605  LBB3_22:
 20606  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20607  	JG   LBB3_60
 20608  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 20609  	JLE  LBB3_91
 20610  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 20611  	JE   LBB3_143
 20612  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 20613  	JE   LBB3_146
 20614  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20615  	JNE  LBB3_923
 20616  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20617  	JLE  LBB3_923
 20618  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20619  	LONG $0x08f88341         // cmp    r8d, 8
 20620  	JB   LBB3_29
 20621  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 20622  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20623  	JBE  LBB3_377
 20624  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 20625  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20626  	JBE  LBB3_377
 20627  
 20628  LBB3_29:
 20629  	WORD $0xf631 // xor    esi, esi
 20630  
 20631  LBB3_622:
 20632  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 20633  	WORD $0xf749; BYTE $0xd0 // not    r8
 20634  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 20635  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 20636  	LONG $0x03e78348         // and    rdi, 3
 20637  	JE   LBB3_624
 20638  
 20639  LBB3_623:
 20640  	WORD $0xc031             // xor    eax, eax
 20641  	LONG $0x00b23c83         // cmp    dword [rdx + 4*rsi], 0
 20642  	WORD $0x950f; BYTE $0xd0 // setne    al
 20643  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20644  	LONG $0x01c68348         // add    rsi, 1
 20645  	LONG $0xffc78348         // add    rdi, -1
 20646  	JNE  LBB3_623
 20647  
 20648  LBB3_624:
 20649  	LONG $0x03f88349 // cmp    r8, 3
 20650  	JB   LBB3_923
 20651  
 20652  LBB3_625:
 20653  	WORD $0xc031                 // xor    eax, eax
 20654  	LONG $0x00b23c83             // cmp    dword [rdx + 4*rsi], 0
 20655  	WORD $0x950f; BYTE $0xd0     // setne    al
 20656  	WORD $0x0489; BYTE $0xb1     // mov    dword [rcx + 4*rsi], eax
 20657  	WORD $0xc031                 // xor    eax, eax
 20658  	LONG $0x04b27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 4], 0
 20659  	WORD $0x950f; BYTE $0xd0     // setne    al
 20660  	LONG $0x04b14489             // mov    dword [rcx + 4*rsi + 4], eax
 20661  	WORD $0xc031                 // xor    eax, eax
 20662  	LONG $0x08b27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 8], 0
 20663  	WORD $0x950f; BYTE $0xd0     // setne    al
 20664  	LONG $0x08b14489             // mov    dword [rcx + 4*rsi + 8], eax
 20665  	WORD $0xc031                 // xor    eax, eax
 20666  	LONG $0x0cb27c83; BYTE $0x00 // cmp    dword [rdx + 4*rsi + 12], 0
 20667  	WORD $0x950f; BYTE $0xd0     // setne    al
 20668  	LONG $0x0cb14489             // mov    dword [rcx + 4*rsi + 12], eax
 20669  	LONG $0x04c68348             // add    rsi, 4
 20670  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 20671  	JNE  LBB3_625
 20672  	JMP  LBB3_923
 20673  
 20674  LBB3_30:
 20675  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20676  	JG   LBB3_67
 20677  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 20678  	JLE  LBB3_96
 20679  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 20680  	JE   LBB3_149
 20681  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 20682  	JE   LBB3_152
 20683  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20684  	JNE  LBB3_923
 20685  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20686  	JLE  LBB3_923
 20687  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20688  	LONG $0x08f88341         // cmp    r8d, 8
 20689  	JB   LBB3_37
 20690  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 20691  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20692  	JBE  LBB3_380
 20693  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 20694  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20695  	JBE  LBB3_380
 20696  
 20697  LBB3_37:
 20698  	WORD $0xf631 // xor    esi, esi
 20699  
 20700  LBB3_536:
 20701  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 20702  	WORD $0xf749; BYTE $0xd0 // not    r8
 20703  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 20704  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 20705  	LONG $0x03e78348         // and    rdi, 3
 20706  	JE   LBB3_538
 20707  
 20708  LBB3_537:
 20709  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 20710  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20711  	LONG $0x01c68348         // add    rsi, 1
 20712  	LONG $0xffc78348         // add    rdi, -1
 20713  	JNE  LBB3_537
 20714  
 20715  LBB3_538:
 20716  	LONG $0x03f88349 // cmp    r8, 3
 20717  	JB   LBB3_923
 20718  
 20719  LBB3_539:
 20720  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 20721  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20722  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
 20723  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
 20724  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
 20725  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
 20726  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
 20727  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
 20728  	LONG $0x04c68348         // add    rsi, 4
 20729  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 20730  	JNE  LBB3_539
 20731  	JMP  LBB3_923
 20732  
 20733  LBB3_38:
 20734  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20735  	JG   LBB3_74
 20736  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 20737  	JLE  LBB3_101
 20738  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 20739  	JE   LBB3_155
 20740  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 20741  	JE   LBB3_158
 20742  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 20743  	JNE  LBB3_923
 20744  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20745  	JLE  LBB3_923
 20746  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20747  	LONG $0x08f88341         // cmp    r8d, 8
 20748  	JB   LBB3_45
 20749  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 20750  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20751  	JBE  LBB3_382
 20752  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 20753  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20754  	JBE  LBB3_382
 20755  
 20756  LBB3_45:
 20757  	WORD $0xf631 // xor    esi, esi
 20758  
 20759  LBB3_546:
 20760  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 20761  	WORD $0xf749; BYTE $0xd0 // not    r8
 20762  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 20763  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 20764  	LONG $0x03e78348         // and    rdi, 3
 20765  	JE   LBB3_548
 20766  
 20767  LBB3_547:
 20768  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 20769  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20770  	LONG $0x01c68348         // add    rsi, 1
 20771  	LONG $0xffc78348         // add    rdi, -1
 20772  	JNE  LBB3_547
 20773  
 20774  LBB3_548:
 20775  	LONG $0x03f88349 // cmp    r8, 3
 20776  	JB   LBB3_923
 20777  
 20778  LBB3_549:
 20779  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 20780  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 20781  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
 20782  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
 20783  	LONG $0x08b2448b         // mov    eax, dword [rdx + 4*rsi + 8]
 20784  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
 20785  	LONG $0x0cb2448b         // mov    eax, dword [rdx + 4*rsi + 12]
 20786  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
 20787  	LONG $0x04c68348         // add    rsi, 4
 20788  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 20789  	JNE  LBB3_549
 20790  	JMP  LBB3_923
 20791  
 20792  LBB3_46:
 20793  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 20794  	JLE  LBB3_106
 20795  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 20796  	JE   LBB3_161
 20797  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 20798  	JE   LBB3_164
 20799  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 20800  	JNE  LBB3_923
 20801  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20802  	JLE  LBB3_923
 20803  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20804  	LONG $0x04f88341         // cmp    r8d, 4
 20805  	JB   LBB3_52
 20806  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 20807  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20808  	JBE  LBB3_384
 20809  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 20810  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20811  	JBE  LBB3_384
 20812  
 20813  LBB3_52:
 20814  	WORD $0xf631 // xor    esi, esi
 20815  
 20816  LBB3_630:
 20817  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 20818  	WORD $0xf748; BYTE $0xd0     // not    rax
 20819  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 20820  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 20821  	LONG $0x03e78348             // and    rdi, 3
 20822  	JE   LBB3_633
 20823  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 20824  
 20825  LBB3_632:
 20826  	LONG $0x0c100ff2; BYTE $0xf2 // movsd    xmm1, qword [rdx + 8*rsi]
 20827  	LONG $0xc8570f66             // xorpd    xmm1, xmm0
 20828  	LONG $0x0c130f66; BYTE $0xf1 // movlpd    qword [rcx + 8*rsi], xmm1
 20829  	LONG $0x01c68348             // add    rsi, 1
 20830  	LONG $0xffc78348             // add    rdi, -1
 20831  	JNE  LBB3_632
 20832  
 20833  LBB3_633:
 20834  	LONG $0x03f88348             // cmp    rax, 3
 20835  	JB   LBB3_923
 20836  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 20837  
 20838  LBB3_635:
 20839  	LONG $0x0c100ff2; BYTE $0xf2   // movsd    xmm1, qword [rdx + 8*rsi]
 20840  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20841  	LONG $0x0c130f66; BYTE $0xf1   // movlpd    qword [rcx + 8*rsi], xmm1
 20842  	LONG $0x4c100ff2; WORD $0x08f2 // movsd    xmm1, qword [rdx + 8*rsi + 8]
 20843  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20844  	LONG $0x4c130f66; WORD $0x08f1 // movlpd    qword [rcx + 8*rsi + 8], xmm1
 20845  	LONG $0x4c100ff2; WORD $0x10f2 // movsd    xmm1, qword [rdx + 8*rsi + 16]
 20846  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20847  	LONG $0x4c130f66; WORD $0x10f1 // movlpd    qword [rcx + 8*rsi + 16], xmm1
 20848  	LONG $0x4c100ff2; WORD $0x18f2 // movsd    xmm1, qword [rdx + 8*rsi + 24]
 20849  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20850  	LONG $0x4c130f66; WORD $0x18f1 // movlpd    qword [rcx + 8*rsi + 24], xmm1
 20851  	LONG $0x04c68348               // add    rsi, 4
 20852  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 20853  	JNE  LBB3_635
 20854  	JMP  LBB3_923
 20855  
 20856  LBB3_53:
 20857  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 20858  	JLE  LBB3_111
 20859  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 20860  	JE   LBB3_167
 20861  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 20862  	JE   LBB3_170
 20863  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 20864  	JNE  LBB3_923
 20865  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20866  	JLE  LBB3_923
 20867  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20868  	LONG $0x04f88341         // cmp    r8d, 4
 20869  	JB   LBB3_59
 20870  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 20871  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20872  	JBE  LBB3_387
 20873  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 20874  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20875  	JBE  LBB3_387
 20876  
 20877  LBB3_59:
 20878  	WORD $0xf631 // xor    esi, esi
 20879  
 20880  LBB3_640:
 20881  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 20882  	WORD $0xf748; BYTE $0xd0     // not    rax
 20883  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 20884  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 20885  	LONG $0x03e78348             // and    rdi, 3
 20886  	JE   LBB3_643
 20887  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 20888  
 20889  LBB3_642:
 20890  	LONG $0x0c100ff2; BYTE $0xf2 // movsd    xmm1, qword [rdx + 8*rsi]
 20891  	LONG $0xc8570f66             // xorpd    xmm1, xmm0
 20892  	LONG $0x0c130f66; BYTE $0xf1 // movlpd    qword [rcx + 8*rsi], xmm1
 20893  	LONG $0x01c68348             // add    rsi, 1
 20894  	LONG $0xffc78348             // add    rdi, -1
 20895  	JNE  LBB3_642
 20896  
 20897  LBB3_643:
 20898  	LONG $0x03f88348             // cmp    rax, 3
 20899  	JB   LBB3_923
 20900  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 20901  
 20902  LBB3_645:
 20903  	LONG $0x0c100ff2; BYTE $0xf2   // movsd    xmm1, qword [rdx + 8*rsi]
 20904  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20905  	LONG $0x0c130f66; BYTE $0xf1   // movlpd    qword [rcx + 8*rsi], xmm1
 20906  	LONG $0x4c100ff2; WORD $0x08f2 // movsd    xmm1, qword [rdx + 8*rsi + 8]
 20907  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20908  	LONG $0x4c130f66; WORD $0x08f1 // movlpd    qword [rcx + 8*rsi + 8], xmm1
 20909  	LONG $0x4c100ff2; WORD $0x10f2 // movsd    xmm1, qword [rdx + 8*rsi + 16]
 20910  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20911  	LONG $0x4c130f66; WORD $0x10f1 // movlpd    qword [rcx + 8*rsi + 16], xmm1
 20912  	LONG $0x4c100ff2; WORD $0x18f2 // movsd    xmm1, qword [rdx + 8*rsi + 24]
 20913  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 20914  	LONG $0x4c130f66; WORD $0x18f1 // movlpd    qword [rcx + 8*rsi + 24], xmm1
 20915  	LONG $0x04c68348               // add    rsi, 4
 20916  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 20917  	JNE  LBB3_645
 20918  	JMP  LBB3_923
 20919  
 20920  LBB3_60:
 20921  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 20922  	JLE  LBB3_116
 20923  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 20924  	JE   LBB3_173
 20925  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 20926  	JE   LBB3_176
 20927  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 20928  	JNE  LBB3_923
 20929  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20930  	JLE  LBB3_923
 20931  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20932  	LONG $0x04f88341         // cmp    r8d, 4
 20933  	JB   LBB3_66
 20934  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 20935  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 20936  	JBE  LBB3_390
 20937  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 20938  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 20939  	JBE  LBB3_390
 20940  
 20941  LBB3_66:
 20942  	WORD $0xf631 // xor    esi, esi
 20943  
 20944  LBB3_650:
 20945  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 20946  	WORD $0xf748; BYTE $0xd0     // not    rax
 20947  	LONG $0x01c1f641             // test    r9b, 1
 20948  	JE   LBB3_652
 20949  	LONG $0x04100ff2; BYTE $0xf2 // movsd    xmm0, qword [rdx + 8*rsi]
 20950  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI3_0] */
 20951  	LONG $0xc8540f66             // andpd    xmm1, xmm0
 20952  	QUAD $0x000000a095100ff2     // movsd    xmm2, qword 160[rbp] /* [rip + .LCPI3_2] */
 20953  	LONG $0xd1560f66             // orpd    xmm2, xmm1
 20954  	LONG $0xc9570f66             // xorpd    xmm1, xmm1
 20955  	LONG $0xc8c20ff2; BYTE $0x00 // cmpeqsd    xmm1, xmm0
 20956  	LONG $0xca550f66             // andnpd    xmm1, xmm2
 20957  	LONG $0x0c130f66; BYTE $0xf1 // movlpd    qword [rcx + 8*rsi], xmm1
 20958  	LONG $0x01ce8348             // or    rsi, 1
 20959  
 20960  LBB3_652:
 20961  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 20962  	JE   LBB3_923
 20963  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 20964  	QUAD $0x000000a08d100ff2     // movsd    xmm1, qword 160[rbp] /* [rip + .LCPI3_2] */
 20965  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 20966  
 20967  LBB3_654:
 20968  	LONG $0x1c100ff2; BYTE $0xf2   // movsd    xmm3, qword [rdx + 8*rsi]
 20969  	LONG $0xe3280f66               // movapd    xmm4, xmm3
 20970  	LONG $0xe0540f66               // andpd    xmm4, xmm0
 20971  	LONG $0xe1560f66               // orpd    xmm4, xmm1
 20972  	LONG $0xdac20ff2; BYTE $0x00   // cmpeqsd    xmm3, xmm2
 20973  	LONG $0xdc550f66               // andnpd    xmm3, xmm4
 20974  	LONG $0x1c130f66; BYTE $0xf1   // movlpd    qword [rcx + 8*rsi], xmm3
 20975  	LONG $0x5c100ff2; WORD $0x08f2 // movsd    xmm3, qword [rdx + 8*rsi + 8]
 20976  	LONG $0xe3280f66               // movapd    xmm4, xmm3
 20977  	LONG $0xe0540f66               // andpd    xmm4, xmm0
 20978  	LONG $0xe1560f66               // orpd    xmm4, xmm1
 20979  	LONG $0xdac20ff2; BYTE $0x00   // cmpeqsd    xmm3, xmm2
 20980  	LONG $0xdc550f66               // andnpd    xmm3, xmm4
 20981  	LONG $0x5c130f66; WORD $0x08f1 // movlpd    qword [rcx + 8*rsi + 8], xmm3
 20982  	LONG $0x02c68348               // add    rsi, 2
 20983  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 20984  	JNE  LBB3_654
 20985  	JMP  LBB3_923
 20986  
 20987  LBB3_67:
 20988  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 20989  	JLE  LBB3_121
 20990  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 20991  	JE   LBB3_179
 20992  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 20993  	JE   LBB3_182
 20994  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 20995  	JNE  LBB3_923
 20996  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 20997  	JLE  LBB3_923
 20998  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 20999  	LONG $0x04f88341         // cmp    r8d, 4
 21000  	JB   LBB3_73
 21001  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21002  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21003  	JBE  LBB3_393
 21004  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21005  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21006  	JBE  LBB3_393
 21007  
 21008  LBB3_73:
 21009  	WORD $0xf631 // xor    esi, esi
 21010  
 21011  LBB3_659:
 21012  	QUAD $0xffffffffffffba49; WORD $0x7fff // mov    r10, 9223372036854775807
 21013  	WORD $0x8949; BYTE $0xf0               // mov    r8, rsi
 21014  	WORD $0xf749; BYTE $0xd0               // not    r8
 21015  	WORD $0x014d; BYTE $0xc8               // add    r8, r9
 21016  	WORD $0x894c; BYTE $0xc8               // mov    rax, r9
 21017  	LONG $0x03e08348                       // and    rax, 3
 21018  	JE   LBB3_661
 21019  
 21020  LBB3_660:
 21021  	LONG $0xf23c8b48         // mov    rdi, qword [rdx + 8*rsi]
 21022  	WORD $0x214c; BYTE $0xd7 // and    rdi, r10
 21023  	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
 21024  	LONG $0x01c68348         // add    rsi, 1
 21025  	LONG $0xffc08348         // add    rax, -1
 21026  	JNE  LBB3_660
 21027  
 21028  LBB3_661:
 21029  	LONG $0x03f88349 // cmp    r8, 3
 21030  	JB   LBB3_923
 21031  
 21032  LBB3_662:
 21033  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 21034  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21035  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21036  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 21037  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21038  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 21039  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
 21040  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21041  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 21042  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
 21043  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21044  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 21045  	LONG $0x04c68348             // add    rsi, 4
 21046  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21047  	JNE  LBB3_662
 21048  	JMP  LBB3_923
 21049  
 21050  LBB3_74:
 21051  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21052  	JLE  LBB3_126
 21053  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 21054  	JE   LBB3_185
 21055  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 21056  	JE   LBB3_188
 21057  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 21058  	JNE  LBB3_923
 21059  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21060  	JLE  LBB3_923
 21061  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21062  	LONG $0x04f88341         // cmp    r8d, 4
 21063  	JB   LBB3_80
 21064  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21065  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21066  	JBE  LBB3_396
 21067  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21068  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21069  	JBE  LBB3_396
 21070  
 21071  LBB3_80:
 21072  	WORD $0xf631 // xor    esi, esi
 21073  
 21074  LBB3_667:
 21075  	QUAD $0xffffffffffffba49; WORD $0x7fff // mov    r10, 9223372036854775807
 21076  	WORD $0x8949; BYTE $0xf0               // mov    r8, rsi
 21077  	WORD $0xf749; BYTE $0xd0               // not    r8
 21078  	WORD $0x014d; BYTE $0xc8               // add    r8, r9
 21079  	WORD $0x894c; BYTE $0xc8               // mov    rax, r9
 21080  	LONG $0x03e08348                       // and    rax, 3
 21081  	JE   LBB3_669
 21082  
 21083  LBB3_668:
 21084  	LONG $0xf23c8b48         // mov    rdi, qword [rdx + 8*rsi]
 21085  	WORD $0x214c; BYTE $0xd7 // and    rdi, r10
 21086  	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
 21087  	LONG $0x01c68348         // add    rsi, 1
 21088  	LONG $0xffc08348         // add    rax, -1
 21089  	JNE  LBB3_668
 21090  
 21091  LBB3_669:
 21092  	LONG $0x03f88349 // cmp    r8, 3
 21093  	JB   LBB3_923
 21094  
 21095  LBB3_670:
 21096  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 21097  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21098  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21099  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 21100  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21101  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 21102  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
 21103  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21104  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 21105  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
 21106  	WORD $0x214c; BYTE $0xd0     // and    rax, r10
 21107  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 21108  	LONG $0x04c68348             // add    rsi, 4
 21109  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21110  	JNE  LBB3_670
 21111  	JMP  LBB3_923
 21112  
 21113  LBB3_81:
 21114  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 21115  	JE   LBB3_191
 21116  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 21117  	JNE  LBB3_923
 21118  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21119  	JLE  LBB3_923
 21120  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21121  	LONG $0x20f88341         // cmp    r8d, 32
 21122  	JB   LBB3_85
 21123  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 21124  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21125  	JBE  LBB3_399
 21126  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 21127  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21128  	JBE  LBB3_399
 21129  
 21130  LBB3_85:
 21131  	WORD $0xf631 // xor    esi, esi
 21132  
 21133  LBB3_675:
 21134  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21135  	WORD $0xf749; BYTE $0xd0 // not    r8
 21136  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21137  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21138  	LONG $0x03e78348         // and    rdi, 3
 21139  	JE   LBB3_677
 21140  
 21141  LBB3_676:
 21142  	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
 21143  	WORD $0xc031                 // xor    eax, eax
 21144  	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
 21145  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21146  	LONG $0x01c68348             // add    rsi, 1
 21147  	LONG $0xffc78348             // add    rdi, -1
 21148  	JNE  LBB3_676
 21149  
 21150  LBB3_677:
 21151  	LONG $0x03f88349 // cmp    r8, 3
 21152  	JB   LBB3_923
 21153  
 21154  LBB3_678:
 21155  	WORD $0xc031                 // xor    eax, eax
 21156  	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
 21157  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21158  	WORD $0xc031                 // xor    eax, eax
 21159  	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
 21160  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 21161  	WORD $0xc031                 // xor    eax, eax
 21162  	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
 21163  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
 21164  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
 21165  	WORD $0xff31                 // xor    edi, edi
 21166  	WORD $0x2840; BYTE $0xc7     // sub    dil, al
 21167  	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
 21168  	LONG $0x04c68348             // add    rsi, 4
 21169  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21170  	JNE  LBB3_678
 21171  	JMP  LBB3_923
 21172  
 21173  LBB3_86:
 21174  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 21175  	JE   LBB3_194
 21176  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 21177  	JNE  LBB3_923
 21178  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21179  	JLE  LBB3_923
 21180  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21181  	LONG $0x20f88341         // cmp    r8d, 32
 21182  	JB   LBB3_90
 21183  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 21184  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21185  	JBE  LBB3_402
 21186  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 21187  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21188  	JBE  LBB3_402
 21189  
 21190  LBB3_90:
 21191  	WORD $0xf631 // xor    esi, esi
 21192  
 21193  LBB3_683:
 21194  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21195  	WORD $0xf749; BYTE $0xd0 // not    r8
 21196  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21197  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21198  	LONG $0x03e78348         // and    rdi, 3
 21199  	JE   LBB3_685
 21200  
 21201  LBB3_684:
 21202  	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
 21203  	WORD $0xc031                 // xor    eax, eax
 21204  	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
 21205  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21206  	LONG $0x01c68348             // add    rsi, 1
 21207  	LONG $0xffc78348             // add    rdi, -1
 21208  	JNE  LBB3_684
 21209  
 21210  LBB3_685:
 21211  	LONG $0x03f88349 // cmp    r8, 3
 21212  	JB   LBB3_923
 21213  
 21214  LBB3_686:
 21215  	WORD $0xc031                 // xor    eax, eax
 21216  	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
 21217  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21218  	WORD $0xc031                 // xor    eax, eax
 21219  	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
 21220  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 21221  	WORD $0xc031                 // xor    eax, eax
 21222  	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
 21223  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
 21224  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
 21225  	WORD $0xff31                 // xor    edi, edi
 21226  	WORD $0x2840; BYTE $0xc7     // sub    dil, al
 21227  	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
 21228  	LONG $0x04c68348             // add    rsi, 4
 21229  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21230  	JNE  LBB3_686
 21231  	JMP  LBB3_923
 21232  
 21233  LBB3_91:
 21234  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 21235  	JE   LBB3_197
 21236  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 21237  	JNE  LBB3_923
 21238  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21239  	JLE  LBB3_923
 21240  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21241  	LONG $0x20f88341         // cmp    r8d, 32
 21242  	JB   LBB3_95
 21243  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 21244  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21245  	JBE  LBB3_405
 21246  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 21247  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21248  	JBE  LBB3_405
 21249  
 21250  LBB3_95:
 21251  	WORD $0xf631 // xor    esi, esi
 21252  
 21253  LBB3_691:
 21254  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 21255  	WORD $0xf748; BYTE $0xd0     // not    rax
 21256  	LONG $0x01c1f641             // test    r9b, 1
 21257  	JE   LBB3_693
 21258  	LONG $0x323c8a40             // mov    dil, byte [rdx + rsi]
 21259  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 21260  	LONG $0xd0950f41             // setne    r8b
 21261  	WORD $0xf641; BYTE $0xd8     // neg    r8b
 21262  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 21263  	LONG $0xc0b60f45             // movzx    r8d, r8b
 21264  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 21265  	LONG $0xf84e0f41             // cmovle    edi, r8d
 21266  	LONG $0x313c8840             // mov    byte [rcx + rsi], dil
 21267  	LONG $0x01ce8348             // or    rsi, 1
 21268  
 21269  LBB3_693:
 21270  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 21271  	JE   LBB3_923
 21272  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 21273  
 21274  LBB3_695:
 21275  	LONG $0x04b60f44; BYTE $0x32   // movzx    r8d, byte [rdx + rsi]
 21276  	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
 21277  	WORD $0x950f; BYTE $0xd0       // setne    al
 21278  	WORD $0xd8f6                   // neg    al
 21279  	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
 21280  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 21281  	WORD $0x4f0f; BYTE $0xc7       // cmovg    eax, edi
 21282  	WORD $0x0488; BYTE $0x31       // mov    byte [rcx + rsi], al
 21283  	LONG $0x44b60f44; WORD $0x0132 // movzx    r8d, byte [rdx + rsi + 1]
 21284  	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
 21285  	WORD $0x950f; BYTE $0xd0       // setne    al
 21286  	WORD $0xd8f6                   // neg    al
 21287  	WORD $0x8445; BYTE $0xc0       // test    r8b, r8b
 21288  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 21289  	WORD $0x4f0f; BYTE $0xc7       // cmovg    eax, edi
 21290  	LONG $0x01314488               // mov    byte [rcx + rsi + 1], al
 21291  	LONG $0x02c68348               // add    rsi, 2
 21292  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 21293  	JNE  LBB3_695
 21294  	JMP  LBB3_923
 21295  
 21296  LBB3_96:
 21297  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 21298  	JE   LBB3_200
 21299  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 21300  	JNE  LBB3_923
 21301  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21302  	JLE  LBB3_923
 21303  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21304  	LONG $0x10f88341         // cmp    r8d, 16
 21305  	JB   LBB3_100
 21306  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 21307  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21308  	JBE  LBB3_408
 21309  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 21310  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21311  	JBE  LBB3_408
 21312  
 21313  LBB3_100:
 21314  	WORD $0xf631 // xor    esi, esi
 21315  
 21316  LBB3_700:
 21317  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 21318  	WORD $0xf748; BYTE $0xd0 // not    rax
 21319  	LONG $0x01c1f641         // test    r9b, 1
 21320  	JE   LBB3_702
 21321  	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
 21322  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 21323  	LONG $0x07f8c141         // sar    r8d, 7
 21324  	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
 21325  	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
 21326  	LONG $0x313c8840         // mov    byte [rcx + rsi], dil
 21327  	LONG $0x01ce8348         // or    rsi, 1
 21328  
 21329  LBB3_702:
 21330  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 21331  	JE   LBB3_923
 21332  
 21333  LBB3_703:
 21334  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
 21335  	WORD $0xc789                 // mov    edi, eax
 21336  	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
 21337  	WORD $0xf801                 // add    eax, edi
 21338  	WORD $0xf831                 // xor    eax, edi
 21339  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21340  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
 21341  	WORD $0xc789                 // mov    edi, eax
 21342  	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
 21343  	WORD $0xf801                 // add    eax, edi
 21344  	WORD $0xf831                 // xor    eax, edi
 21345  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 21346  	LONG $0x02c68348             // add    rsi, 2
 21347  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21348  	JNE  LBB3_703
 21349  	JMP  LBB3_923
 21350  
 21351  LBB3_101:
 21352  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 21353  	JE   LBB3_203
 21354  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 21355  	JNE  LBB3_923
 21356  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21357  	JLE  LBB3_923
 21358  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21359  	LONG $0x10f88341         // cmp    r8d, 16
 21360  	JB   LBB3_105
 21361  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 21362  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21363  	JBE  LBB3_411
 21364  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 21365  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21366  	JBE  LBB3_411
 21367  
 21368  LBB3_105:
 21369  	WORD $0xf631 // xor    esi, esi
 21370  
 21371  LBB3_708:
 21372  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 21373  	WORD $0xf748; BYTE $0xd0 // not    rax
 21374  	LONG $0x01c1f641         // test    r9b, 1
 21375  	JE   LBB3_710
 21376  	LONG $0x323cbe0f         // movsx    edi, byte [rdx + rsi]
 21377  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 21378  	LONG $0x07f8c141         // sar    r8d, 7
 21379  	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
 21380  	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
 21381  	LONG $0x313c8840         // mov    byte [rcx + rsi], dil
 21382  	LONG $0x01ce8348         // or    rsi, 1
 21383  
 21384  LBB3_710:
 21385  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 21386  	JE   LBB3_923
 21387  
 21388  LBB3_711:
 21389  	LONG $0x3204be0f             // movsx    eax, byte [rdx + rsi]
 21390  	WORD $0xc789                 // mov    edi, eax
 21391  	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
 21392  	WORD $0xf801                 // add    eax, edi
 21393  	WORD $0xf831                 // xor    eax, edi
 21394  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 21395  	LONG $0x3244be0f; BYTE $0x01 // movsx    eax, byte [rdx + rsi + 1]
 21396  	WORD $0xc789                 // mov    edi, eax
 21397  	WORD $0xffc1; BYTE $0x07     // sar    edi, 7
 21398  	WORD $0xf801                 // add    eax, edi
 21399  	WORD $0xf831                 // xor    eax, edi
 21400  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 21401  	LONG $0x02c68348             // add    rsi, 2
 21402  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21403  	JNE  LBB3_711
 21404  	JMP  LBB3_923
 21405  
 21406  LBB3_106:
 21407  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 21408  	JE   LBB3_206
 21409  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21410  	JNE  LBB3_923
 21411  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21412  	JLE  LBB3_923
 21413  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21414  	LONG $0x04f88341         // cmp    r8d, 4
 21415  	JAE  LBB3_265
 21416  	WORD $0xd231             // xor    edx, edx
 21417  	JMP  LBB3_420
 21418  
 21419  LBB3_111:
 21420  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 21421  	JE   LBB3_209
 21422  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21423  	JNE  LBB3_923
 21424  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21425  	JLE  LBB3_923
 21426  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21427  	LONG $0x04f88341         // cmp    r8d, 4
 21428  	JB   LBB3_115
 21429  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21430  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21431  	JBE  LBB3_421
 21432  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21433  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21434  	JBE  LBB3_421
 21435  
 21436  LBB3_115:
 21437  	WORD $0xf631 // xor    esi, esi
 21438  
 21439  LBB3_716:
 21440  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21441  	WORD $0xf749; BYTE $0xd0 // not    r8
 21442  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21443  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21444  	LONG $0x03e78348         // and    rdi, 3
 21445  	JE   LBB3_718
 21446  
 21447  LBB3_717:
 21448  	WORD $0xc031     // xor    eax, eax
 21449  	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
 21450  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
 21451  	LONG $0x01c68348 // add    rsi, 1
 21452  	LONG $0xffc78348 // add    rdi, -1
 21453  	JNE  LBB3_717
 21454  
 21455  LBB3_718:
 21456  	LONG $0x03f88349 // cmp    r8, 3
 21457  	JB   LBB3_923
 21458  
 21459  LBB3_719:
 21460  	WORD $0xc031                 // xor    eax, eax
 21461  	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
 21462  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21463  	WORD $0xc031                 // xor    eax, eax
 21464  	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
 21465  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 21466  	WORD $0xc031                 // xor    eax, eax
 21467  	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
 21468  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 21469  	WORD $0xc031                 // xor    eax, eax
 21470  	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
 21471  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 21472  	LONG $0x04c68348             // add    rsi, 4
 21473  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21474  	JNE  LBB3_719
 21475  	JMP  LBB3_923
 21476  
 21477  LBB3_116:
 21478  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 21479  	JE   LBB3_212
 21480  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21481  	JNE  LBB3_923
 21482  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21483  	JLE  LBB3_923
 21484  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21485  	LONG $0x04f88341         // cmp    r8d, 4
 21486  	JB   LBB3_120
 21487  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21488  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21489  	JBE  LBB3_424
 21490  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21491  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21492  	JBE  LBB3_424
 21493  
 21494  LBB3_120:
 21495  	WORD $0xf631 // xor    esi, esi
 21496  
 21497  LBB3_724:
 21498  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21499  	WORD $0xf749; BYTE $0xd0 // not    r8
 21500  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21501  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21502  	LONG $0x03e78348         // and    rdi, 3
 21503  	JE   LBB3_726
 21504  
 21505  LBB3_725:
 21506  	WORD $0xc031                 // xor    eax, eax
 21507  	LONG $0xf23c8348; BYTE $0x00 // cmp    qword [rdx + 8*rsi], 0
 21508  	WORD $0x950f; BYTE $0xd0     // setne    al
 21509  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21510  	LONG $0x01c68348             // add    rsi, 1
 21511  	LONG $0xffc78348             // add    rdi, -1
 21512  	JNE  LBB3_725
 21513  
 21514  LBB3_726:
 21515  	LONG $0x03f88349 // cmp    r8, 3
 21516  	JB   LBB3_923
 21517  
 21518  LBB3_727:
 21519  	WORD $0xc031                   // xor    eax, eax
 21520  	LONG $0xf23c8348; BYTE $0x00   // cmp    qword [rdx + 8*rsi], 0
 21521  	WORD $0x950f; BYTE $0xd0       // setne    al
 21522  	LONG $0xf1048948               // mov    qword [rcx + 8*rsi], rax
 21523  	WORD $0xc031                   // xor    eax, eax
 21524  	LONG $0xf27c8348; WORD $0x0008 // cmp    qword [rdx + 8*rsi + 8], 0
 21525  	WORD $0x950f; BYTE $0xd0       // setne    al
 21526  	LONG $0xf1448948; BYTE $0x08   // mov    qword [rcx + 8*rsi + 8], rax
 21527  	WORD $0xc031                   // xor    eax, eax
 21528  	LONG $0xf27c8348; WORD $0x0010 // cmp    qword [rdx + 8*rsi + 16], 0
 21529  	WORD $0x950f; BYTE $0xd0       // setne    al
 21530  	LONG $0xf1448948; BYTE $0x10   // mov    qword [rcx + 8*rsi + 16], rax
 21531  	WORD $0xc031                   // xor    eax, eax
 21532  	LONG $0xf27c8348; WORD $0x0018 // cmp    qword [rdx + 8*rsi + 24], 0
 21533  	WORD $0x950f; BYTE $0xd0       // setne    al
 21534  	LONG $0xf1448948; BYTE $0x18   // mov    qword [rcx + 8*rsi + 24], rax
 21535  	LONG $0x04c68348               // add    rsi, 4
 21536  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 21537  	JNE  LBB3_727
 21538  	JMP  LBB3_923
 21539  
 21540  LBB3_121:
 21541  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 21542  	JE   LBB3_215
 21543  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21544  	JNE  LBB3_923
 21545  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21546  	JLE  LBB3_923
 21547  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21548  	LONG $0x04f88341         // cmp    r8d, 4
 21549  	JB   LBB3_125
 21550  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21551  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21552  	JBE  LBB3_427
 21553  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21554  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21555  	JBE  LBB3_427
 21556  
 21557  LBB3_125:
 21558  	WORD $0xf631 // xor    esi, esi
 21559  
 21560  LBB3_556:
 21561  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21562  	WORD $0xf749; BYTE $0xd0 // not    r8
 21563  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21564  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21565  	LONG $0x03e78348         // and    rdi, 3
 21566  	JE   LBB3_558
 21567  
 21568  LBB3_557:
 21569  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
 21570  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
 21571  	LONG $0x01c68348 // add    rsi, 1
 21572  	LONG $0xffc78348 // add    rdi, -1
 21573  	JNE  LBB3_557
 21574  
 21575  LBB3_558:
 21576  	LONG $0x03f88349 // cmp    r8, 3
 21577  	JB   LBB3_923
 21578  
 21579  LBB3_559:
 21580  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 21581  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21582  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 21583  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 21584  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
 21585  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 21586  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
 21587  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 21588  	LONG $0x04c68348             // add    rsi, 4
 21589  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21590  	JNE  LBB3_559
 21591  	JMP  LBB3_923
 21592  
 21593  LBB3_126:
 21594  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 21595  	JE   LBB3_218
 21596  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 21597  	JNE  LBB3_923
 21598  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21599  	JLE  LBB3_923
 21600  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21601  	LONG $0x04f88341         // cmp    r8d, 4
 21602  	JB   LBB3_130
 21603  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 21604  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21605  	JBE  LBB3_429
 21606  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 21607  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21608  	JBE  LBB3_429
 21609  
 21610  LBB3_130:
 21611  	WORD $0xf631 // xor    esi, esi
 21612  
 21613  LBB3_566:
 21614  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21615  	WORD $0xf749; BYTE $0xd0 // not    r8
 21616  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21617  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21618  	LONG $0x03e78348         // and    rdi, 3
 21619  	JE   LBB3_568
 21620  
 21621  LBB3_567:
 21622  	LONG $0xf2048b48 // mov    rax, qword [rdx + 8*rsi]
 21623  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
 21624  	LONG $0x01c68348 // add    rsi, 1
 21625  	LONG $0xffc78348 // add    rdi, -1
 21626  	JNE  LBB3_567
 21627  
 21628  LBB3_568:
 21629  	LONG $0x03f88349 // cmp    r8, 3
 21630  	JB   LBB3_923
 21631  
 21632  LBB3_569:
 21633  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 21634  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 21635  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 21636  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 21637  	LONG $0xf2448b48; BYTE $0x10 // mov    rax, qword [rdx + 8*rsi + 16]
 21638  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 21639  	LONG $0xf2448b48; BYTE $0x18 // mov    rax, qword [rdx + 8*rsi + 24]
 21640  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 21641  	LONG $0x04c68348             // add    rsi, 4
 21642  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21643  	JNE  LBB3_569
 21644  	JMP  LBB3_923
 21645  
 21646  LBB3_131:
 21647  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21648  	JLE  LBB3_923
 21649  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21650  	LONG $0x10f88341         // cmp    r8d, 16
 21651  	JAE  LBB3_279
 21652  	WORD $0xd231             // xor    edx, edx
 21653  	JMP  LBB3_437
 21654  
 21655  LBB3_134:
 21656  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21657  	JLE  LBB3_923
 21658  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21659  	LONG $0x10f88341         // cmp    r8d, 16
 21660  	JB   LBB3_136
 21661  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21662  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21663  	JBE  LBB3_438
 21664  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21665  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21666  	JBE  LBB3_438
 21667  
 21668  LBB3_136:
 21669  	WORD $0xf631 // xor    esi, esi
 21670  
 21671  LBB3_732:
 21672  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21673  	WORD $0xf749; BYTE $0xd0 // not    r8
 21674  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21675  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21676  	LONG $0x03e78348         // and    rdi, 3
 21677  	JE   LBB3_734
 21678  
 21679  LBB3_733:
 21680  	WORD $0xc031     // xor    eax, eax
 21681  	LONG $0x72042b66 // sub    ax, word [rdx + 2*rsi]
 21682  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
 21683  	LONG $0x01c68348 // add    rsi, 1
 21684  	LONG $0xffc78348 // add    rdi, -1
 21685  	JNE  LBB3_733
 21686  
 21687  LBB3_734:
 21688  	LONG $0x03f88349 // cmp    r8, 3
 21689  	JB   LBB3_923
 21690  
 21691  LBB3_735:
 21692  	WORD $0xc031                 // xor    eax, eax
 21693  	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
 21694  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21695  	WORD $0xc031                 // xor    eax, eax
 21696  	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
 21697  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 21698  	WORD $0xc031                 // xor    eax, eax
 21699  	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
 21700  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
 21701  	WORD $0xc031                 // xor    eax, eax
 21702  	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
 21703  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
 21704  	LONG $0x04c68348             // add    rsi, 4
 21705  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21706  	JNE  LBB3_735
 21707  	JMP  LBB3_923
 21708  
 21709  LBB3_137:
 21710  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21711  	JLE  LBB3_923
 21712  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21713  	LONG $0x10f88341         // cmp    r8d, 16
 21714  	JB   LBB3_139
 21715  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21716  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21717  	JBE  LBB3_441
 21718  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21719  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21720  	JBE  LBB3_441
 21721  
 21722  LBB3_139:
 21723  	WORD $0xf631 // xor    esi, esi
 21724  
 21725  LBB3_740:
 21726  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21727  	WORD $0xf749; BYTE $0xd0 // not    r8
 21728  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21729  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21730  	LONG $0x03e78348         // and    rdi, 3
 21731  	JE   LBB3_742
 21732  
 21733  LBB3_741:
 21734  	WORD $0xc031     // xor    eax, eax
 21735  	LONG $0x72042b66 // sub    ax, word [rdx + 2*rsi]
 21736  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
 21737  	LONG $0x01c68348 // add    rsi, 1
 21738  	LONG $0xffc78348 // add    rdi, -1
 21739  	JNE  LBB3_741
 21740  
 21741  LBB3_742:
 21742  	LONG $0x03f88349 // cmp    r8, 3
 21743  	JB   LBB3_923
 21744  
 21745  LBB3_743:
 21746  	WORD $0xc031                 // xor    eax, eax
 21747  	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
 21748  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21749  	WORD $0xc031                 // xor    eax, eax
 21750  	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
 21751  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 21752  	WORD $0xc031                 // xor    eax, eax
 21753  	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
 21754  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
 21755  	WORD $0xc031                 // xor    eax, eax
 21756  	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
 21757  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
 21758  	LONG $0x04c68348             // add    rsi, 4
 21759  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21760  	JNE  LBB3_743
 21761  	JMP  LBB3_923
 21762  
 21763  LBB3_140:
 21764  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21765  	JLE  LBB3_923
 21766  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21767  	LONG $0x10f88341         // cmp    r8d, 16
 21768  	JB   LBB3_142
 21769  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21770  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21771  	JBE  LBB3_444
 21772  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21773  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21774  	JBE  LBB3_444
 21775  
 21776  LBB3_142:
 21777  	WORD $0xf631 // xor    esi, esi
 21778  
 21779  LBB3_748:
 21780  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21781  	WORD $0xf749; BYTE $0xd0 // not    r8
 21782  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21783  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21784  	LONG $0x03e78348         // and    rdi, 3
 21785  	JE   LBB3_750
 21786  
 21787  LBB3_749:
 21788  	WORD $0xc031     // xor    eax, eax
 21789  	LONG $0x72042b66 // sub    ax, word [rdx + 2*rsi]
 21790  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
 21791  	LONG $0x01c68348 // add    rsi, 1
 21792  	LONG $0xffc78348 // add    rdi, -1
 21793  	JNE  LBB3_749
 21794  
 21795  LBB3_750:
 21796  	LONG $0x03f88349 // cmp    r8, 3
 21797  	JB   LBB3_923
 21798  
 21799  LBB3_751:
 21800  	WORD $0xc031                 // xor    eax, eax
 21801  	LONG $0x72042b66             // sub    ax, word [rdx + 2*rsi]
 21802  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21803  	WORD $0xc031                 // xor    eax, eax
 21804  	LONG $0x72442b66; BYTE $0x02 // sub    ax, word [rdx + 2*rsi + 2]
 21805  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 21806  	WORD $0xc031                 // xor    eax, eax
 21807  	LONG $0x72442b66; BYTE $0x04 // sub    ax, word [rdx + 2*rsi + 4]
 21808  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
 21809  	WORD $0xc031                 // xor    eax, eax
 21810  	LONG $0x72442b66; BYTE $0x06 // sub    ax, word [rdx + 2*rsi + 6]
 21811  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
 21812  	LONG $0x04c68348             // add    rsi, 4
 21813  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21814  	JNE  LBB3_751
 21815  	JMP  LBB3_923
 21816  
 21817  LBB3_143:
 21818  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21819  	JLE  LBB3_923
 21820  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21821  	LONG $0x10f88341         // cmp    r8d, 16
 21822  	JB   LBB3_145
 21823  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21824  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21825  	JBE  LBB3_447
 21826  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21827  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21828  	JBE  LBB3_447
 21829  
 21830  LBB3_145:
 21831  	WORD $0xf631 // xor    esi, esi
 21832  
 21833  LBB3_756:
 21834  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21835  	WORD $0xf749; BYTE $0xd0 // not    r8
 21836  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21837  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21838  	LONG $0x03e78348         // and    rdi, 3
 21839  	JE   LBB3_758
 21840  
 21841  LBB3_757:
 21842  	WORD $0xc031                 // xor    eax, eax
 21843  	LONG $0x723c8366; BYTE $0x00 // cmp    word [rdx + 2*rsi], 0
 21844  	WORD $0x950f; BYTE $0xd0     // setne    al
 21845  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21846  	LONG $0x01c68348             // add    rsi, 1
 21847  	LONG $0xffc78348             // add    rdi, -1
 21848  	JNE  LBB3_757
 21849  
 21850  LBB3_758:
 21851  	LONG $0x03f88349 // cmp    r8, 3
 21852  	JB   LBB3_923
 21853  
 21854  LBB3_759:
 21855  	WORD $0xc031                   // xor    eax, eax
 21856  	LONG $0x723c8366; BYTE $0x00   // cmp    word [rdx + 2*rsi], 0
 21857  	WORD $0x950f; BYTE $0xd0       // setne    al
 21858  	LONG $0x71048966               // mov    word [rcx + 2*rsi], ax
 21859  	WORD $0xc031                   // xor    eax, eax
 21860  	LONG $0x727c8366; WORD $0x0002 // cmp    word [rdx + 2*rsi + 2], 0
 21861  	WORD $0x950f; BYTE $0xd0       // setne    al
 21862  	LONG $0x71448966; BYTE $0x02   // mov    word [rcx + 2*rsi + 2], ax
 21863  	WORD $0xc031                   // xor    eax, eax
 21864  	LONG $0x727c8366; WORD $0x0004 // cmp    word [rdx + 2*rsi + 4], 0
 21865  	WORD $0x950f; BYTE $0xd0       // setne    al
 21866  	LONG $0x71448966; BYTE $0x04   // mov    word [rcx + 2*rsi + 4], ax
 21867  	WORD $0xc031                   // xor    eax, eax
 21868  	LONG $0x727c8366; WORD $0x0006 // cmp    word [rdx + 2*rsi + 6], 0
 21869  	WORD $0x950f; BYTE $0xd0       // setne    al
 21870  	LONG $0x71448966; BYTE $0x06   // mov    word [rcx + 2*rsi + 6], ax
 21871  	LONG $0x04c68348               // add    rsi, 4
 21872  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 21873  	JNE  LBB3_759
 21874  	JMP  LBB3_923
 21875  
 21876  LBB3_146:
 21877  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21878  	JLE  LBB3_923
 21879  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21880  	LONG $0x10f88341         // cmp    r8d, 16
 21881  	JB   LBB3_148
 21882  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21883  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21884  	JBE  LBB3_450
 21885  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21886  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21887  	JBE  LBB3_450
 21888  
 21889  LBB3_148:
 21890  	WORD $0xf631 // xor    esi, esi
 21891  
 21892  LBB3_764:
 21893  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 21894  	WORD $0xf748; BYTE $0xd0     // not    rax
 21895  	LONG $0x01c1f641             // test    r9b, 1
 21896  	JE   LBB3_766
 21897  	LONG $0x04b70f44; BYTE $0x72 // movzx    r8d, word [rdx + 2*rsi]
 21898  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 21899  	LONG $0xc0854566             // test    r8w, r8w
 21900  	LONG $0xd2950f41             // setne    r10b
 21901  	WORD $0xf741; BYTE $0xda     // neg    r10d
 21902  	LONG $0xc0854566             // test    r8w, r8w
 21903  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 21904  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 21905  	LONG $0x713c8966             // mov    word [rcx + 2*rsi], di
 21906  	LONG $0x01ce8348             // or    rsi, 1
 21907  
 21908  LBB3_766:
 21909  	WORD $0x014c; BYTE $0xc8       // add    rax, r9
 21910  	JE   LBB3_923
 21911  	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1
 21912  
 21913  LBB3_768:
 21914  	LONG $0x723cb70f             // movzx    edi, word [rdx + 2*rsi]
 21915  	WORD $0xc031                 // xor    eax, eax
 21916  	WORD $0x8566; BYTE $0xff     // test    di, di
 21917  	WORD $0x950f; BYTE $0xd0     // setne    al
 21918  	WORD $0xd8f7                 // neg    eax
 21919  	WORD $0x8566; BYTE $0xff     // test    di, di
 21920  	LONG $0xc04f0f41             // cmovg    eax, r8d
 21921  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21922  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
 21923  	WORD $0xff31                 // xor    edi, edi
 21924  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 21925  	LONG $0xd7950f40             // setne    dil
 21926  	WORD $0xdff7                 // neg    edi
 21927  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 21928  	LONG $0xf84f0f41             // cmovg    edi, r8d
 21929  	LONG $0x717c8966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], di
 21930  	LONG $0x02c68348             // add    rsi, 2
 21931  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21932  	JNE  LBB3_768
 21933  	JMP  LBB3_923
 21934  
 21935  LBB3_149:
 21936  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21937  	JLE  LBB3_923
 21938  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21939  	LONG $0x10f88341         // cmp    r8d, 16
 21940  	JB   LBB3_151
 21941  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21942  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21943  	JBE  LBB3_453
 21944  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21945  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21946  	JBE  LBB3_453
 21947  
 21948  LBB3_151:
 21949  	WORD $0xf631 // xor    esi, esi
 21950  
 21951  LBB3_576:
 21952  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 21953  	WORD $0xf749; BYTE $0xd0 // not    r8
 21954  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 21955  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 21956  	LONG $0x03e78348         // and    rdi, 3
 21957  	JE   LBB3_578
 21958  
 21959  LBB3_577:
 21960  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
 21961  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
 21962  	LONG $0x01c68348 // add    rsi, 1
 21963  	LONG $0xffc78348 // add    rdi, -1
 21964  	JNE  LBB3_577
 21965  
 21966  LBB3_578:
 21967  	LONG $0x03f88349 // cmp    r8, 3
 21968  	JB   LBB3_923
 21969  
 21970  LBB3_579:
 21971  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
 21972  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 21973  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
 21974  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 21975  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
 21976  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
 21977  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
 21978  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
 21979  	LONG $0x04c68348             // add    rsi, 4
 21980  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 21981  	JNE  LBB3_579
 21982  	JMP  LBB3_923
 21983  
 21984  LBB3_152:
 21985  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 21986  	JLE  LBB3_923
 21987  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 21988  	LONG $0x08f88341         // cmp    r8d, 8
 21989  	JB   LBB3_154
 21990  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 21991  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 21992  	JBE  LBB3_455
 21993  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 21994  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 21995  	JBE  LBB3_455
 21996  
 21997  LBB3_154:
 21998  	WORD $0xf631 // xor    esi, esi
 21999  
 22000  LBB3_773:
 22001  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 22002  	WORD $0xf748; BYTE $0xd0 // not    rax
 22003  	LONG $0x01c1f641         // test    r9b, 1
 22004  	JE   LBB3_775
 22005  	LONG $0x723cbf0f         // movsx    edi, word [rdx + 2*rsi]
 22006  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 22007  	LONG $0x0ff8c141         // sar    r8d, 15
 22008  	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
 22009  	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
 22010  	LONG $0x713c8966         // mov    word [rcx + 2*rsi], di
 22011  	LONG $0x01ce8348         // or    rsi, 1
 22012  
 22013  LBB3_775:
 22014  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 22015  	JE   LBB3_923
 22016  
 22017  LBB3_776:
 22018  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
 22019  	WORD $0xc789                 // mov    edi, eax
 22020  	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
 22021  	WORD $0xf801                 // add    eax, edi
 22022  	WORD $0xf831                 // xor    eax, edi
 22023  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 22024  	LONG $0x7244bf0f; BYTE $0x02 // movsx    eax, word [rdx + 2*rsi + 2]
 22025  	WORD $0xc789                 // mov    edi, eax
 22026  	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
 22027  	WORD $0xf801                 // add    eax, edi
 22028  	WORD $0xf831                 // xor    eax, edi
 22029  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 22030  	LONG $0x02c68348             // add    rsi, 2
 22031  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22032  	JNE  LBB3_776
 22033  	JMP  LBB3_923
 22034  
 22035  LBB3_155:
 22036  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22037  	JLE  LBB3_923
 22038  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22039  	LONG $0x10f88341         // cmp    r8d, 16
 22040  	JB   LBB3_157
 22041  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 22042  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22043  	JBE  LBB3_458
 22044  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 22045  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22046  	JBE  LBB3_458
 22047  
 22048  LBB3_157:
 22049  	WORD $0xf631 // xor    esi, esi
 22050  
 22051  LBB3_586:
 22052  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22053  	WORD $0xf749; BYTE $0xd0 // not    r8
 22054  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22055  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22056  	LONG $0x03e78348         // and    rdi, 3
 22057  	JE   LBB3_588
 22058  
 22059  LBB3_587:
 22060  	LONG $0x7204b70f // movzx    eax, word [rdx + 2*rsi]
 22061  	LONG $0x71048966 // mov    word [rcx + 2*rsi], ax
 22062  	LONG $0x01c68348 // add    rsi, 1
 22063  	LONG $0xffc78348 // add    rdi, -1
 22064  	JNE  LBB3_587
 22065  
 22066  LBB3_588:
 22067  	LONG $0x03f88349 // cmp    r8, 3
 22068  	JB   LBB3_923
 22069  
 22070  LBB3_589:
 22071  	LONG $0x7204b70f             // movzx    eax, word [rdx + 2*rsi]
 22072  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 22073  	LONG $0x7244b70f; BYTE $0x02 // movzx    eax, word [rdx + 2*rsi + 2]
 22074  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 22075  	LONG $0x7244b70f; BYTE $0x04 // movzx    eax, word [rdx + 2*rsi + 4]
 22076  	LONG $0x71448966; BYTE $0x04 // mov    word [rcx + 2*rsi + 4], ax
 22077  	LONG $0x7244b70f; BYTE $0x06 // movzx    eax, word [rdx + 2*rsi + 6]
 22078  	LONG $0x71448966; BYTE $0x06 // mov    word [rcx + 2*rsi + 6], ax
 22079  	LONG $0x04c68348             // add    rsi, 4
 22080  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22081  	JNE  LBB3_589
 22082  	JMP  LBB3_923
 22083  
 22084  LBB3_158:
 22085  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22086  	JLE  LBB3_923
 22087  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22088  	LONG $0x08f88341         // cmp    r8d, 8
 22089  	JB   LBB3_160
 22090  	LONG $0x4a048d4a         // lea    rax, [rdx + 2*r9]
 22091  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22092  	JBE  LBB3_460
 22093  	LONG $0x49048d4a         // lea    rax, [rcx + 2*r9]
 22094  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22095  	JBE  LBB3_460
 22096  
 22097  LBB3_160:
 22098  	WORD $0xf631 // xor    esi, esi
 22099  
 22100  LBB3_781:
 22101  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 22102  	WORD $0xf748; BYTE $0xd0 // not    rax
 22103  	LONG $0x01c1f641         // test    r9b, 1
 22104  	JE   LBB3_783
 22105  	LONG $0x723cbf0f         // movsx    edi, word [rdx + 2*rsi]
 22106  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 22107  	LONG $0x0ff8c141         // sar    r8d, 15
 22108  	WORD $0x0144; BYTE $0xc7 // add    edi, r8d
 22109  	WORD $0x3144; BYTE $0xc7 // xor    edi, r8d
 22110  	LONG $0x713c8966         // mov    word [rcx + 2*rsi], di
 22111  	LONG $0x01ce8348         // or    rsi, 1
 22112  
 22113  LBB3_783:
 22114  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 22115  	JE   LBB3_923
 22116  
 22117  LBB3_784:
 22118  	LONG $0x7204bf0f             // movsx    eax, word [rdx + 2*rsi]
 22119  	WORD $0xc789                 // mov    edi, eax
 22120  	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
 22121  	WORD $0xf801                 // add    eax, edi
 22122  	WORD $0xf831                 // xor    eax, edi
 22123  	LONG $0x71048966             // mov    word [rcx + 2*rsi], ax
 22124  	LONG $0x7244bf0f; BYTE $0x02 // movsx    eax, word [rdx + 2*rsi + 2]
 22125  	WORD $0xc789                 // mov    edi, eax
 22126  	WORD $0xffc1; BYTE $0x0f     // sar    edi, 15
 22127  	WORD $0xf801                 // add    eax, edi
 22128  	WORD $0xf831                 // xor    eax, edi
 22129  	LONG $0x71448966; BYTE $0x02 // mov    word [rcx + 2*rsi + 2], ax
 22130  	LONG $0x02c68348             // add    rsi, 2
 22131  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22132  	JNE  LBB3_784
 22133  	JMP  LBB3_923
 22134  
 22135  LBB3_161:
 22136  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22137  	JLE  LBB3_923
 22138  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22139  	LONG $0x04f88341         // cmp    r8d, 4
 22140  	JB   LBB3_163
 22141  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 22142  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22143  	JBE  LBB3_463
 22144  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 22145  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22146  	JBE  LBB3_463
 22147  
 22148  LBB3_163:
 22149  	WORD $0xf631 // xor    esi, esi
 22150  
 22151  LBB3_789:
 22152  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22153  	WORD $0xf749; BYTE $0xd0 // not    r8
 22154  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22155  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22156  	LONG $0x03e78348         // and    rdi, 3
 22157  	JE   LBB3_791
 22158  
 22159  LBB3_790:
 22160  	WORD $0xc031     // xor    eax, eax
 22161  	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
 22162  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
 22163  	LONG $0x01c68348 // add    rsi, 1
 22164  	LONG $0xffc78348 // add    rdi, -1
 22165  	JNE  LBB3_790
 22166  
 22167  LBB3_791:
 22168  	LONG $0x03f88349 // cmp    r8, 3
 22169  	JB   LBB3_923
 22170  
 22171  LBB3_792:
 22172  	WORD $0xc031                 // xor    eax, eax
 22173  	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
 22174  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 22175  	WORD $0xc031                 // xor    eax, eax
 22176  	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
 22177  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 22178  	WORD $0xc031                 // xor    eax, eax
 22179  	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
 22180  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 22181  	WORD $0xc031                 // xor    eax, eax
 22182  	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
 22183  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 22184  	LONG $0x04c68348             // add    rsi, 4
 22185  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22186  	JNE  LBB3_792
 22187  	JMP  LBB3_923
 22188  
 22189  LBB3_164:
 22190  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22191  	JLE  LBB3_923
 22192  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22193  	LONG $0x08f88341         // cmp    r8d, 8
 22194  	JB   LBB3_166
 22195  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22196  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22197  	JBE  LBB3_466
 22198  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22199  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22200  	JBE  LBB3_466
 22201  
 22202  LBB3_166:
 22203  	WORD $0xf631 // xor    esi, esi
 22204  
 22205  LBB3_797:
 22206  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 22207  	WORD $0xf748; BYTE $0xd0     // not    rax
 22208  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 22209  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 22210  	LONG $0x03e78348             // and    rdi, 3
 22211  	JE   LBB3_800
 22212  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 22213  
 22214  LBB3_799:
 22215  	LONG $0x0c100ff3; BYTE $0xb2 // movss    xmm1, dword [rdx + 4*rsi]
 22216  	LONG $0xc8570f66             // xorpd    xmm1, xmm0
 22217  	LONG $0x0c110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm1
 22218  	LONG $0x01c68348             // add    rsi, 1
 22219  	LONG $0xffc78348             // add    rdi, -1
 22220  	JNE  LBB3_799
 22221  
 22222  LBB3_800:
 22223  	LONG $0x03f88348             // cmp    rax, 3
 22224  	JB   LBB3_923
 22225  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 22226  
 22227  LBB3_802:
 22228  	LONG $0x0c100ff3; BYTE $0xb2   // movss    xmm1, dword [rdx + 4*rsi]
 22229  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22230  	LONG $0x0c110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm1
 22231  	LONG $0x4c100ff3; WORD $0x04b2 // movss    xmm1, dword [rdx + 4*rsi + 4]
 22232  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22233  	LONG $0x4c110ff3; WORD $0x04b1 // movss    dword [rcx + 4*rsi + 4], xmm1
 22234  	LONG $0x4c100ff3; WORD $0x08b2 // movss    xmm1, dword [rdx + 4*rsi + 8]
 22235  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22236  	LONG $0x4c110ff3; WORD $0x08b1 // movss    dword [rcx + 4*rsi + 8], xmm1
 22237  	LONG $0x4c100ff3; WORD $0x0cb2 // movss    xmm1, dword [rdx + 4*rsi + 12]
 22238  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22239  	LONG $0x4c110ff3; WORD $0x0cb1 // movss    dword [rcx + 4*rsi + 12], xmm1
 22240  	LONG $0x04c68348               // add    rsi, 4
 22241  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 22242  	JNE  LBB3_802
 22243  	JMP  LBB3_923
 22244  
 22245  LBB3_167:
 22246  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22247  	JLE  LBB3_923
 22248  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22249  	LONG $0x04f88341         // cmp    r8d, 4
 22250  	JB   LBB3_169
 22251  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 22252  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22253  	JBE  LBB3_469
 22254  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 22255  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22256  	JBE  LBB3_469
 22257  
 22258  LBB3_169:
 22259  	WORD $0xf631 // xor    esi, esi
 22260  
 22261  LBB3_807:
 22262  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22263  	WORD $0xf749; BYTE $0xd0 // not    r8
 22264  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22265  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22266  	LONG $0x03e78348         // and    rdi, 3
 22267  	JE   LBB3_809
 22268  
 22269  LBB3_808:
 22270  	WORD $0xc031     // xor    eax, eax
 22271  	LONG $0xf2042b48 // sub    rax, qword [rdx + 8*rsi]
 22272  	LONG $0xf1048948 // mov    qword [rcx + 8*rsi], rax
 22273  	LONG $0x01c68348 // add    rsi, 1
 22274  	LONG $0xffc78348 // add    rdi, -1
 22275  	JNE  LBB3_808
 22276  
 22277  LBB3_809:
 22278  	LONG $0x03f88349 // cmp    r8, 3
 22279  	JB   LBB3_923
 22280  
 22281  LBB3_810:
 22282  	WORD $0xc031                 // xor    eax, eax
 22283  	LONG $0xf2042b48             // sub    rax, qword [rdx + 8*rsi]
 22284  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 22285  	WORD $0xc031                 // xor    eax, eax
 22286  	LONG $0xf2442b48; BYTE $0x08 // sub    rax, qword [rdx + 8*rsi + 8]
 22287  	LONG $0xf1448948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rax
 22288  	WORD $0xc031                 // xor    eax, eax
 22289  	LONG $0xf2442b48; BYTE $0x10 // sub    rax, qword [rdx + 8*rsi + 16]
 22290  	LONG $0xf1448948; BYTE $0x10 // mov    qword [rcx + 8*rsi + 16], rax
 22291  	WORD $0xc031                 // xor    eax, eax
 22292  	LONG $0xf2442b48; BYTE $0x18 // sub    rax, qword [rdx + 8*rsi + 24]
 22293  	LONG $0xf1448948; BYTE $0x18 // mov    qword [rcx + 8*rsi + 24], rax
 22294  	LONG $0x04c68348             // add    rsi, 4
 22295  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22296  	JNE  LBB3_810
 22297  	JMP  LBB3_923
 22298  
 22299  LBB3_170:
 22300  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22301  	JLE  LBB3_923
 22302  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22303  	LONG $0x08f88341         // cmp    r8d, 8
 22304  	JB   LBB3_172
 22305  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22306  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22307  	JBE  LBB3_472
 22308  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22309  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22310  	JBE  LBB3_472
 22311  
 22312  LBB3_172:
 22313  	WORD $0xf631 // xor    esi, esi
 22314  
 22315  LBB3_815:
 22316  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 22317  	WORD $0xf748; BYTE $0xd0     // not    rax
 22318  	WORD $0x014c; BYTE $0xc8     // add    rax, r9
 22319  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 22320  	LONG $0x03e78348             // and    rdi, 3
 22321  	JE   LBB3_818
 22322  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 22323  
 22324  LBB3_817:
 22325  	LONG $0x0c100ff3; BYTE $0xb2 // movss    xmm1, dword [rdx + 4*rsi]
 22326  	LONG $0xc8570f66             // xorpd    xmm1, xmm0
 22327  	LONG $0x0c110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm1
 22328  	LONG $0x01c68348             // add    rsi, 1
 22329  	LONG $0xffc78348             // add    rdi, -1
 22330  	JNE  LBB3_817
 22331  
 22332  LBB3_818:
 22333  	LONG $0x03f88348             // cmp    rax, 3
 22334  	JB   LBB3_923
 22335  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 22336  
 22337  LBB3_820:
 22338  	LONG $0x0c100ff3; BYTE $0xb2   // movss    xmm1, dword [rdx + 4*rsi]
 22339  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22340  	LONG $0x0c110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm1
 22341  	LONG $0x4c100ff3; WORD $0x04b2 // movss    xmm1, dword [rdx + 4*rsi + 4]
 22342  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22343  	LONG $0x4c110ff3; WORD $0x04b1 // movss    dword [rcx + 4*rsi + 4], xmm1
 22344  	LONG $0x4c100ff3; WORD $0x08b2 // movss    xmm1, dword [rdx + 4*rsi + 8]
 22345  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22346  	LONG $0x4c110ff3; WORD $0x08b1 // movss    dword [rcx + 4*rsi + 8], xmm1
 22347  	LONG $0x4c100ff3; WORD $0x0cb2 // movss    xmm1, dword [rdx + 4*rsi + 12]
 22348  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 22349  	LONG $0x4c110ff3; WORD $0x0cb1 // movss    dword [rcx + 4*rsi + 12], xmm1
 22350  	LONG $0x04c68348               // add    rsi, 4
 22351  	WORD $0x3949; BYTE $0xf1       // cmp    r9, rsi
 22352  	JNE  LBB3_820
 22353  	JMP  LBB3_923
 22354  
 22355  LBB3_173:
 22356  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22357  	JLE  LBB3_923
 22358  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22359  	LONG $0x04f88341         // cmp    r8d, 4
 22360  	JB   LBB3_175
 22361  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 22362  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22363  	JBE  LBB3_475
 22364  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 22365  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22366  	JBE  LBB3_475
 22367  
 22368  LBB3_175:
 22369  	WORD $0xf631 // xor    esi, esi
 22370  
 22371  LBB3_825:
 22372  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 22373  	WORD $0xf748; BYTE $0xd0     // not    rax
 22374  	LONG $0x01c1f641             // test    r9b, 1
 22375  	JE   LBB3_827
 22376  	LONG $0xf2048b4c             // mov    r8, qword [rdx + 8*rsi]
 22377  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 22378  	WORD $0x854d; BYTE $0xc0     // test    r8, r8
 22379  	LONG $0xd2950f41             // setne    r10b
 22380  	WORD $0xf749; BYTE $0xda     // neg    r10
 22381  	WORD $0x854d; BYTE $0xc0     // test    r8, r8
 22382  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 22383  	LONG $0xfa4e0f49             // cmovle    rdi, r10
 22384  	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
 22385  	LONG $0x01ce8348             // or    rsi, 1
 22386  
 22387  LBB3_827:
 22388  	WORD $0x014c; BYTE $0xc8       // add    rax, r9
 22389  	JE   LBB3_923
 22390  	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1
 22391  
 22392  LBB3_829:
 22393  	LONG $0xf23c8b48             // mov    rdi, qword [rdx + 8*rsi]
 22394  	WORD $0xc031                 // xor    eax, eax
 22395  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 22396  	WORD $0x950f; BYTE $0xd0     // setne    al
 22397  	WORD $0xf748; BYTE $0xd8     // neg    rax
 22398  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 22399  	LONG $0xc04f0f49             // cmovg    rax, r8
 22400  	LONG $0xf1048948             // mov    qword [rcx + 8*rsi], rax
 22401  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 22402  	WORD $0xff31                 // xor    edi, edi
 22403  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 22404  	LONG $0xd7950f40             // setne    dil
 22405  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 22406  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 22407  	LONG $0xf84f0f49             // cmovg    rdi, r8
 22408  	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
 22409  	LONG $0x02c68348             // add    rsi, 2
 22410  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22411  	JNE  LBB3_829
 22412  	JMP  LBB3_923
 22413  
 22414  LBB3_176:
 22415  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22416  	JLE  LBB3_923
 22417  	WORD $0x8944; BYTE $0xc0 // mov    eax, r8d
 22418  	LONG $0x08f88341         // cmp    r8d, 8
 22419  	JB   LBB3_178
 22420  	LONG $0x82348d48         // lea    rsi, [rdx + 4*rax]
 22421  	WORD $0x3948; BYTE $0xce // cmp    rsi, rcx
 22422  	JBE  LBB3_478
 22423  	LONG $0x81348d48         // lea    rsi, [rcx + 4*rax]
 22424  	WORD $0x3948; BYTE $0xd6 // cmp    rsi, rdx
 22425  	JBE  LBB3_478
 22426  
 22427  LBB3_178:
 22428  	WORD $0xf631 // xor    esi, esi
 22429  
 22430  LBB3_481:
 22431  	WORD $0x8949; BYTE $0xf0     // mov    r8, rsi
 22432  	WORD $0xf749; BYTE $0xd0     // not    r8
 22433  	WORD $0x01a8                 // test    al, 1
 22434  	JE   LBB3_483
 22435  	LONG $0x04100ff3; BYTE $0xb2 // movss    xmm0, dword [rdx + 4*rsi]
 22436  	WORD $0x500f; BYTE $0xf8     // movmskps    edi, xmm0
 22437  	WORD $0xe783; BYTE $0x01     // and    edi, 1
 22438  	WORD $0xdff7                 // neg    edi
 22439  	WORD $0xcf83; BYTE $0x01     // or    edi, 1
 22440  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 22441  	LONG $0xcf2a0ff3             // cvtsi2ss    xmm1, edi
 22442  	WORD $0x570f; BYTE $0xd2     // xorps    xmm2, xmm2
 22443  	LONG $0xd0c20ff3; BYTE $0x00 // cmpeqss    xmm2, xmm0
 22444  	WORD $0x550f; BYTE $0xd1     // andnps    xmm2, xmm1
 22445  	LONG $0x14110ff3; BYTE $0xb1 // movss    dword [rcx + 4*rsi], xmm2
 22446  	LONG $0x01ce8348             // or    rsi, 1
 22447  
 22448  LBB3_483:
 22449  	WORD $0x0149; BYTE $0xc0 // add    r8, rax
 22450  	JE   LBB3_923
 22451  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 22452  
 22453  LBB3_485:
 22454  	LONG $0x0c100ff3; BYTE $0xb2   // movss    xmm1, dword [rdx + 4*rsi]
 22455  	WORD $0x500f; BYTE $0xf9       // movmskps    edi, xmm1
 22456  	WORD $0xe783; BYTE $0x01       // and    edi, 1
 22457  	WORD $0xdff7                   // neg    edi
 22458  	WORD $0xcf83; BYTE $0x01       // or    edi, 1
 22459  	WORD $0x570f; BYTE $0xd2       // xorps    xmm2, xmm2
 22460  	LONG $0xd72a0ff3               // cvtsi2ss    xmm2, edi
 22461  	LONG $0xc8c20ff3; BYTE $0x00   // cmpeqss    xmm1, xmm0
 22462  	WORD $0x550f; BYTE $0xca       // andnps    xmm1, xmm2
 22463  	LONG $0x0c110ff3; BYTE $0xb1   // movss    dword [rcx + 4*rsi], xmm1
 22464  	LONG $0x4c100ff3; WORD $0x04b2 // movss    xmm1, dword [rdx + 4*rsi + 4]
 22465  	WORD $0x500f; BYTE $0xf9       // movmskps    edi, xmm1
 22466  	WORD $0xe783; BYTE $0x01       // and    edi, 1
 22467  	WORD $0xdff7                   // neg    edi
 22468  	WORD $0xcf83; BYTE $0x01       // or    edi, 1
 22469  	WORD $0x570f; BYTE $0xd2       // xorps    xmm2, xmm2
 22470  	LONG $0xd72a0ff3               // cvtsi2ss    xmm2, edi
 22471  	LONG $0xc8c20ff3; BYTE $0x00   // cmpeqss    xmm1, xmm0
 22472  	WORD $0x550f; BYTE $0xca       // andnps    xmm1, xmm2
 22473  	LONG $0x4c110ff3; WORD $0x04b1 // movss    dword [rcx + 4*rsi + 4], xmm1
 22474  	LONG $0x02c68348               // add    rsi, 2
 22475  	WORD $0x3948; BYTE $0xf0       // cmp    rax, rsi
 22476  	JNE  LBB3_485
 22477  	JMP  LBB3_923
 22478  
 22479  LBB3_179:
 22480  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22481  	JLE  LBB3_923
 22482  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22483  	LONG $0x04f88341         // cmp    r8d, 4
 22484  	JB   LBB3_181
 22485  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 22486  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22487  	JBE  LBB3_486
 22488  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 22489  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22490  	JBE  LBB3_486
 22491  
 22492  LBB3_181:
 22493  	WORD $0xf631 // xor    esi, esi
 22494  
 22495  LBB3_834:
 22496  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 22497  	WORD $0xf748; BYTE $0xd0 // not    rax
 22498  	LONG $0x01c1f641         // test    r9b, 1
 22499  	JE   LBB3_836
 22500  	LONG $0xf2048b4c         // mov    r8, qword [rdx + 8*rsi]
 22501  	WORD $0x894c; BYTE $0xc7 // mov    rdi, r8
 22502  	WORD $0xf748; BYTE $0xdf // neg    rdi
 22503  	LONG $0xf84c0f49         // cmovl    rdi, r8
 22504  	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
 22505  	LONG $0x01ce8348         // or    rsi, 1
 22506  
 22507  LBB3_836:
 22508  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 22509  	JE   LBB3_923
 22510  
 22511  LBB3_837:
 22512  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 22513  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
 22514  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 22515  	LONG $0xf84c0f48             // cmovl    rdi, rax
 22516  	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
 22517  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 22518  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
 22519  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 22520  	LONG $0xf84c0f48             // cmovl    rdi, rax
 22521  	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
 22522  	LONG $0x02c68348             // add    rsi, 2
 22523  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22524  	JNE  LBB3_837
 22525  	JMP  LBB3_923
 22526  
 22527  LBB3_182:
 22528  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22529  	JLE  LBB3_923
 22530  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22531  	LONG $0x08f88341         // cmp    r8d, 8
 22532  	JB   LBB3_184
 22533  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22534  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22535  	JBE  LBB3_489
 22536  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22537  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22538  	JBE  LBB3_489
 22539  
 22540  LBB3_184:
 22541  	WORD $0xf631 // xor    esi, esi
 22542  
 22543  LBB3_842:
 22544  	WORD $0x8949; BYTE $0xf0       // mov    r8, rsi
 22545  	WORD $0xf749; BYTE $0xd0       // not    r8
 22546  	WORD $0x014d; BYTE $0xc8       // add    r8, r9
 22547  	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
 22548  	LONG $0x03e78348               // and    rdi, 3
 22549  	JE   LBB3_845
 22550  	LONG $0xffffba41; WORD $0x7fff // mov    r10d, 2147483647
 22551  
 22552  LBB3_844:
 22553  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 22554  	WORD $0x2144; BYTE $0xd0 // and    eax, r10d
 22555  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22556  	LONG $0x01c68348         // add    rsi, 1
 22557  	LONG $0xffc78348         // add    rdi, -1
 22558  	JNE  LBB3_844
 22559  
 22560  LBB3_845:
 22561  	LONG $0x03f88349             // cmp    r8, 3
 22562  	JB   LBB3_923
 22563  	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
 22564  
 22565  LBB3_847:
 22566  	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
 22567  	WORD $0xc721             // and    edi, eax
 22568  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 22569  	LONG $0x04b27c8b         // mov    edi, dword [rdx + 4*rsi + 4]
 22570  	WORD $0xc721             // and    edi, eax
 22571  	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
 22572  	LONG $0x08b27c8b         // mov    edi, dword [rdx + 4*rsi + 8]
 22573  	WORD $0xc721             // and    edi, eax
 22574  	LONG $0x08b17c89         // mov    dword [rcx + 4*rsi + 8], edi
 22575  	LONG $0x0cb27c8b         // mov    edi, dword [rdx + 4*rsi + 12]
 22576  	WORD $0xc721             // and    edi, eax
 22577  	LONG $0x0cb17c89         // mov    dword [rcx + 4*rsi + 12], edi
 22578  	LONG $0x04c68348         // add    rsi, 4
 22579  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 22580  	JNE  LBB3_847
 22581  	JMP  LBB3_923
 22582  
 22583  LBB3_185:
 22584  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22585  	JLE  LBB3_923
 22586  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22587  	LONG $0x04f88341         // cmp    r8d, 4
 22588  	JB   LBB3_187
 22589  	LONG $0xca048d4a         // lea    rax, [rdx + 8*r9]
 22590  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22591  	JBE  LBB3_492
 22592  	LONG $0xc9048d4a         // lea    rax, [rcx + 8*r9]
 22593  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22594  	JBE  LBB3_492
 22595  
 22596  LBB3_187:
 22597  	WORD $0xf631 // xor    esi, esi
 22598  
 22599  LBB3_852:
 22600  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 22601  	WORD $0xf748; BYTE $0xd0 // not    rax
 22602  	LONG $0x01c1f641         // test    r9b, 1
 22603  	JE   LBB3_854
 22604  	LONG $0xf2048b4c         // mov    r8, qword [rdx + 8*rsi]
 22605  	WORD $0x894c; BYTE $0xc7 // mov    rdi, r8
 22606  	WORD $0xf748; BYTE $0xdf // neg    rdi
 22607  	LONG $0xf84c0f49         // cmovl    rdi, r8
 22608  	LONG $0xf13c8948         // mov    qword [rcx + 8*rsi], rdi
 22609  	LONG $0x01ce8348         // or    rsi, 1
 22610  
 22611  LBB3_854:
 22612  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 22613  	JE   LBB3_923
 22614  
 22615  LBB3_855:
 22616  	LONG $0xf2048b48             // mov    rax, qword [rdx + 8*rsi]
 22617  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
 22618  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 22619  	LONG $0xf84c0f48             // cmovl    rdi, rax
 22620  	LONG $0xf13c8948             // mov    qword [rcx + 8*rsi], rdi
 22621  	LONG $0xf2448b48; BYTE $0x08 // mov    rax, qword [rdx + 8*rsi + 8]
 22622  	WORD $0x8948; BYTE $0xc7     // mov    rdi, rax
 22623  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 22624  	LONG $0xf84c0f48             // cmovl    rdi, rax
 22625  	LONG $0xf17c8948; BYTE $0x08 // mov    qword [rcx + 8*rsi + 8], rdi
 22626  	LONG $0x02c68348             // add    rsi, 2
 22627  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22628  	JNE  LBB3_855
 22629  	JMP  LBB3_923
 22630  
 22631  LBB3_188:
 22632  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22633  	JLE  LBB3_923
 22634  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22635  	LONG $0x08f88341         // cmp    r8d, 8
 22636  	JB   LBB3_190
 22637  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22638  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22639  	JBE  LBB3_495
 22640  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22641  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22642  	JBE  LBB3_495
 22643  
 22644  LBB3_190:
 22645  	WORD $0xf631 // xor    esi, esi
 22646  
 22647  LBB3_860:
 22648  	WORD $0x8949; BYTE $0xf0       // mov    r8, rsi
 22649  	WORD $0xf749; BYTE $0xd0       // not    r8
 22650  	WORD $0x014d; BYTE $0xc8       // add    r8, r9
 22651  	WORD $0x894c; BYTE $0xcf       // mov    rdi, r9
 22652  	LONG $0x03e78348               // and    rdi, 3
 22653  	JE   LBB3_863
 22654  	LONG $0xffffba41; WORD $0x7fff // mov    r10d, 2147483647
 22655  
 22656  LBB3_862:
 22657  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 22658  	WORD $0x2144; BYTE $0xd0 // and    eax, r10d
 22659  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22660  	LONG $0x01c68348         // add    rsi, 1
 22661  	LONG $0xffc78348         // add    rdi, -1
 22662  	JNE  LBB3_862
 22663  
 22664  LBB3_863:
 22665  	LONG $0x03f88349             // cmp    r8, 3
 22666  	JB   LBB3_923
 22667  	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
 22668  
 22669  LBB3_865:
 22670  	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
 22671  	WORD $0xc721             // and    edi, eax
 22672  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 22673  	LONG $0x04b27c8b         // mov    edi, dword [rdx + 4*rsi + 4]
 22674  	WORD $0xc721             // and    edi, eax
 22675  	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
 22676  	LONG $0x08b27c8b         // mov    edi, dword [rdx + 4*rsi + 8]
 22677  	WORD $0xc721             // and    edi, eax
 22678  	LONG $0x08b17c89         // mov    dword [rcx + 4*rsi + 8], edi
 22679  	LONG $0x0cb27c8b         // mov    edi, dword [rdx + 4*rsi + 12]
 22680  	WORD $0xc721             // and    edi, eax
 22681  	LONG $0x0cb17c89         // mov    dword [rcx + 4*rsi + 12], edi
 22682  	LONG $0x04c68348         // add    rsi, 4
 22683  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 22684  	JNE  LBB3_865
 22685  	JMP  LBB3_923
 22686  
 22687  LBB3_191:
 22688  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22689  	JLE  LBB3_923
 22690  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22691  	LONG $0x20f88341         // cmp    r8d, 32
 22692  	JAE  LBB3_338
 22693  	WORD $0xd231             // xor    edx, edx
 22694  	JMP  LBB3_504
 22695  
 22696  LBB3_194:
 22697  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22698  	JLE  LBB3_923
 22699  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22700  	LONG $0x20f88341         // cmp    r8d, 32
 22701  	JB   LBB3_196
 22702  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 22703  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22704  	JBE  LBB3_505
 22705  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 22706  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22707  	JBE  LBB3_505
 22708  
 22709  LBB3_196:
 22710  	WORD $0xf631 // xor    esi, esi
 22711  
 22712  LBB3_870:
 22713  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22714  	WORD $0xf749; BYTE $0xd0 // not    r8
 22715  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22716  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22717  	LONG $0x03e78348         // and    rdi, 3
 22718  	JE   LBB3_872
 22719  
 22720  LBB3_871:
 22721  	LONG $0x14b60f44; BYTE $0x32 // movzx    r10d, byte [rdx + rsi]
 22722  	WORD $0xc031                 // xor    eax, eax
 22723  	WORD $0x2844; BYTE $0xd0     // sub    al, r10b
 22724  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 22725  	LONG $0x01c68348             // add    rsi, 1
 22726  	LONG $0xffc78348             // add    rdi, -1
 22727  	JNE  LBB3_871
 22728  
 22729  LBB3_872:
 22730  	LONG $0x03f88349 // cmp    r8, 3
 22731  	JB   LBB3_923
 22732  
 22733  LBB3_873:
 22734  	WORD $0xc031                 // xor    eax, eax
 22735  	WORD $0x042a; BYTE $0x32     // sub    al, byte [rdx + rsi]
 22736  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 22737  	WORD $0xc031                 // xor    eax, eax
 22738  	LONG $0x0132442a             // sub    al, byte [rdx + rsi + 1]
 22739  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 22740  	WORD $0xc031                 // xor    eax, eax
 22741  	LONG $0x0232442a             // sub    al, byte [rdx + rsi + 2]
 22742  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
 22743  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
 22744  	WORD $0xff31                 // xor    edi, edi
 22745  	WORD $0x2840; BYTE $0xc7     // sub    dil, al
 22746  	LONG $0x317c8840; BYTE $0x03 // mov    byte [rcx + rsi + 3], dil
 22747  	LONG $0x04c68348             // add    rsi, 4
 22748  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22749  	JNE  LBB3_873
 22750  	JMP  LBB3_923
 22751  
 22752  LBB3_197:
 22753  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22754  	JLE  LBB3_923
 22755  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22756  	LONG $0x20f88341         // cmp    r8d, 32
 22757  	JB   LBB3_199
 22758  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 22759  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22760  	JBE  LBB3_508
 22761  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 22762  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22763  	JBE  LBB3_508
 22764  
 22765  LBB3_199:
 22766  	WORD $0xf631 // xor    esi, esi
 22767  
 22768  LBB3_878:
 22769  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 22770  	WORD $0xf748; BYTE $0xd0 // not    rax
 22771  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 22772  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22773  	LONG $0x03e78348         // and    rdi, 3
 22774  	JE   LBB3_880
 22775  
 22776  LBB3_879:
 22777  	LONG $0x00323c80 // cmp    byte [rdx + rsi], 0
 22778  	LONG $0x3114950f // setne    byte [rcx + rsi]
 22779  	LONG $0x01c68348 // add    rsi, 1
 22780  	LONG $0xffc78348 // add    rdi, -1
 22781  	JNE  LBB3_879
 22782  
 22783  LBB3_880:
 22784  	LONG $0x03f88348 // cmp    rax, 3
 22785  	JB   LBB3_923
 22786  
 22787  LBB3_881:
 22788  	LONG $0x00323c80             // cmp    byte [rdx + rsi], 0
 22789  	LONG $0x3114950f             // setne    byte [rcx + rsi]
 22790  	LONG $0x01327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 1], 0
 22791  	LONG $0x3154950f; BYTE $0x01 // setne    byte [rcx + rsi + 1]
 22792  	LONG $0x02327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 2], 0
 22793  	LONG $0x3154950f; BYTE $0x02 // setne    byte [rcx + rsi + 2]
 22794  	LONG $0x03327c80; BYTE $0x00 // cmp    byte [rdx + rsi + 3], 0
 22795  	LONG $0x3154950f; BYTE $0x03 // setne    byte [rcx + rsi + 3]
 22796  	LONG $0x04c68348             // add    rsi, 4
 22797  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22798  	JNE  LBB3_881
 22799  	JMP  LBB3_923
 22800  
 22801  LBB3_200:
 22802  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22803  	JLE  LBB3_923
 22804  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22805  	LONG $0x20f88341         // cmp    r8d, 32
 22806  	JB   LBB3_202
 22807  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 22808  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22809  	JBE  LBB3_511
 22810  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 22811  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22812  	JBE  LBB3_511
 22813  
 22814  LBB3_202:
 22815  	WORD $0xf631 // xor    esi, esi
 22816  
 22817  LBB3_596:
 22818  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22819  	WORD $0xf749; BYTE $0xd0 // not    r8
 22820  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22821  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22822  	LONG $0x03e78348         // and    rdi, 3
 22823  	JE   LBB3_598
 22824  
 22825  LBB3_597:
 22826  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
 22827  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
 22828  	LONG $0x01c68348         // add    rsi, 1
 22829  	LONG $0xffc78348         // add    rdi, -1
 22830  	JNE  LBB3_597
 22831  
 22832  LBB3_598:
 22833  	LONG $0x03f88349 // cmp    r8, 3
 22834  	JB   LBB3_923
 22835  
 22836  LBB3_599:
 22837  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
 22838  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 22839  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
 22840  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 22841  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
 22842  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
 22843  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
 22844  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
 22845  	LONG $0x04c68348             // add    rsi, 4
 22846  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22847  	JNE  LBB3_599
 22848  	JMP  LBB3_923
 22849  
 22850  LBB3_203:
 22851  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22852  	JLE  LBB3_923
 22853  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22854  	LONG $0x20f88341         // cmp    r8d, 32
 22855  	JB   LBB3_205
 22856  	LONG $0x0a048d4a         // lea    rax, [rdx + r9]
 22857  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22858  	JBE  LBB3_513
 22859  	LONG $0x09048d4a         // lea    rax, [rcx + r9]
 22860  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22861  	JBE  LBB3_513
 22862  
 22863  LBB3_205:
 22864  	WORD $0xf631 // xor    esi, esi
 22865  
 22866  LBB3_606:
 22867  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22868  	WORD $0xf749; BYTE $0xd0 // not    r8
 22869  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22870  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22871  	LONG $0x03e78348         // and    rdi, 3
 22872  	JE   LBB3_608
 22873  
 22874  LBB3_607:
 22875  	LONG $0x3204b60f         // movzx    eax, byte [rdx + rsi]
 22876  	WORD $0x0488; BYTE $0x31 // mov    byte [rcx + rsi], al
 22877  	LONG $0x01c68348         // add    rsi, 1
 22878  	LONG $0xffc78348         // add    rdi, -1
 22879  	JNE  LBB3_607
 22880  
 22881  LBB3_608:
 22882  	LONG $0x03f88349 // cmp    r8, 3
 22883  	JB   LBB3_923
 22884  
 22885  LBB3_609:
 22886  	LONG $0x3204b60f             // movzx    eax, byte [rdx + rsi]
 22887  	WORD $0x0488; BYTE $0x31     // mov    byte [rcx + rsi], al
 22888  	LONG $0x3244b60f; BYTE $0x01 // movzx    eax, byte [rdx + rsi + 1]
 22889  	LONG $0x01314488             // mov    byte [rcx + rsi + 1], al
 22890  	LONG $0x3244b60f; BYTE $0x02 // movzx    eax, byte [rdx + rsi + 2]
 22891  	LONG $0x02314488             // mov    byte [rcx + rsi + 2], al
 22892  	LONG $0x3244b60f; BYTE $0x03 // movzx    eax, byte [rdx + rsi + 3]
 22893  	LONG $0x03314488             // mov    byte [rcx + rsi + 3], al
 22894  	LONG $0x04c68348             // add    rsi, 4
 22895  	WORD $0x3949; BYTE $0xf1     // cmp    r9, rsi
 22896  	JNE  LBB3_609
 22897  	JMP  LBB3_923
 22898  
 22899  LBB3_206:
 22900  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22901  	JLE  LBB3_923
 22902  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22903  	LONG $0x08f88341         // cmp    r8d, 8
 22904  	JB   LBB3_208
 22905  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22906  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22907  	JBE  LBB3_515
 22908  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22909  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22910  	JBE  LBB3_515
 22911  
 22912  LBB3_208:
 22913  	WORD $0xf631 // xor    esi, esi
 22914  
 22915  LBB3_886:
 22916  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22917  	WORD $0xf749; BYTE $0xd0 // not    r8
 22918  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22919  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22920  	LONG $0x03e78348         // and    rdi, 3
 22921  	JE   LBB3_888
 22922  
 22923  LBB3_887:
 22924  	WORD $0xc031             // xor    eax, eax
 22925  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 22926  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22927  	LONG $0x01c68348         // add    rsi, 1
 22928  	LONG $0xffc78348         // add    rdi, -1
 22929  	JNE  LBB3_887
 22930  
 22931  LBB3_888:
 22932  	LONG $0x03f88349 // cmp    r8, 3
 22933  	JB   LBB3_923
 22934  
 22935  LBB3_889:
 22936  	WORD $0xc031             // xor    eax, eax
 22937  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 22938  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22939  	WORD $0xc031             // xor    eax, eax
 22940  	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
 22941  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
 22942  	WORD $0xc031             // xor    eax, eax
 22943  	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
 22944  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
 22945  	WORD $0xc031             // xor    eax, eax
 22946  	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
 22947  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
 22948  	LONG $0x04c68348         // add    rsi, 4
 22949  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 22950  	JNE  LBB3_889
 22951  	JMP  LBB3_923
 22952  
 22953  LBB3_209:
 22954  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 22955  	JLE  LBB3_923
 22956  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 22957  	LONG $0x08f88341         // cmp    r8d, 8
 22958  	JB   LBB3_211
 22959  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 22960  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 22961  	JBE  LBB3_518
 22962  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 22963  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 22964  	JBE  LBB3_518
 22965  
 22966  LBB3_211:
 22967  	WORD $0xf631 // xor    esi, esi
 22968  
 22969  LBB3_894:
 22970  	WORD $0x8949; BYTE $0xf0 // mov    r8, rsi
 22971  	WORD $0xf749; BYTE $0xd0 // not    r8
 22972  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
 22973  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 22974  	LONG $0x03e78348         // and    rdi, 3
 22975  	JE   LBB3_896
 22976  
 22977  LBB3_895:
 22978  	WORD $0xc031             // xor    eax, eax
 22979  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 22980  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22981  	LONG $0x01c68348         // add    rsi, 1
 22982  	LONG $0xffc78348         // add    rdi, -1
 22983  	JNE  LBB3_895
 22984  
 22985  LBB3_896:
 22986  	LONG $0x03f88349 // cmp    r8, 3
 22987  	JB   LBB3_923
 22988  
 22989  LBB3_897:
 22990  	WORD $0xc031             // xor    eax, eax
 22991  	WORD $0x042b; BYTE $0xb2 // sub    eax, dword [rdx + 4*rsi]
 22992  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 22993  	WORD $0xc031             // xor    eax, eax
 22994  	LONG $0x04b2442b         // sub    eax, dword [rdx + 4*rsi + 4]
 22995  	LONG $0x04b14489         // mov    dword [rcx + 4*rsi + 4], eax
 22996  	WORD $0xc031             // xor    eax, eax
 22997  	LONG $0x08b2442b         // sub    eax, dword [rdx + 4*rsi + 8]
 22998  	LONG $0x08b14489         // mov    dword [rcx + 4*rsi + 8], eax
 22999  	WORD $0xc031             // xor    eax, eax
 23000  	LONG $0x0cb2442b         // sub    eax, dword [rdx + 4*rsi + 12]
 23001  	LONG $0x0cb14489         // mov    dword [rcx + 4*rsi + 12], eax
 23002  	LONG $0x04c68348         // add    rsi, 4
 23003  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 23004  	JNE  LBB3_897
 23005  	JMP  LBB3_923
 23006  
 23007  LBB3_212:
 23008  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 23009  	JLE  LBB3_923
 23010  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 23011  	LONG $0x08f88341         // cmp    r8d, 8
 23012  	JB   LBB3_214
 23013  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 23014  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 23015  	JBE  LBB3_521
 23016  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 23017  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 23018  	JBE  LBB3_521
 23019  
 23020  LBB3_214:
 23021  	WORD $0xf631 // xor    esi, esi
 23022  
 23023  LBB3_902:
 23024  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 23025  	WORD $0xf748; BYTE $0xd0     // not    rax
 23026  	LONG $0x01c1f641             // test    r9b, 1
 23027  	JE   LBB3_904
 23028  	LONG $0xb2048b44             // mov    r8d, dword [rdx + 4*rsi]
 23029  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 23030  	WORD $0x8545; BYTE $0xc0     // test    r8d, r8d
 23031  	LONG $0xd2950f41             // setne    r10b
 23032  	WORD $0xf741; BYTE $0xda     // neg    r10d
 23033  	WORD $0x8545; BYTE $0xc0     // test    r8d, r8d
 23034  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 23035  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 23036  	WORD $0x3c89; BYTE $0xb1     // mov    dword [rcx + 4*rsi], edi
 23037  	LONG $0x01ce8348             // or    rsi, 1
 23038  
 23039  LBB3_904:
 23040  	WORD $0x014c; BYTE $0xc8       // add    rax, r9
 23041  	JE   LBB3_923
 23042  	LONG $0x0001b841; WORD $0x0000 // mov    r8d, 1
 23043  
 23044  LBB3_906:
 23045  	WORD $0x3c8b; BYTE $0xb2 // mov    edi, dword [rdx + 4*rsi]
 23046  	WORD $0xc031             // xor    eax, eax
 23047  	WORD $0xff85             // test    edi, edi
 23048  	WORD $0x950f; BYTE $0xd0 // setne    al
 23049  	WORD $0xd8f7             // neg    eax
 23050  	WORD $0xff85             // test    edi, edi
 23051  	LONG $0xc04f0f41         // cmovg    eax, r8d
 23052  	WORD $0x0489; BYTE $0xb1 // mov    dword [rcx + 4*rsi], eax
 23053  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
 23054  	WORD $0xff31             // xor    edi, edi
 23055  	WORD $0xc085             // test    eax, eax
 23056  	LONG $0xd7950f40         // setne    dil
 23057  	WORD $0xdff7             // neg    edi
 23058  	WORD $0xc085             // test    eax, eax
 23059  	LONG $0xf84f0f41         // cmovg    edi, r8d
 23060  	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
 23061  	LONG $0x02c68348         // add    rsi, 2
 23062  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 23063  	JNE  LBB3_906
 23064  	JMP  LBB3_923
 23065  
 23066  LBB3_215:
 23067  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 23068  	JLE  LBB3_923
 23069  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 23070  	LONG $0x08f88341         // cmp    r8d, 8
 23071  	JB   LBB3_217
 23072  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 23073  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 23074  	JBE  LBB3_524
 23075  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 23076  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 23077  	JBE  LBB3_524
 23078  
 23079  LBB3_217:
 23080  	WORD $0xf631 // xor    esi, esi
 23081  
 23082  LBB3_911:
 23083  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 23084  	WORD $0xf748; BYTE $0xd0 // not    rax
 23085  	LONG $0x01c1f641         // test    r9b, 1
 23086  	JE   LBB3_913
 23087  	LONG $0xb2048b44         // mov    r8d, dword [rdx + 4*rsi]
 23088  	WORD $0x8944; BYTE $0xc7 // mov    edi, r8d
 23089  	WORD $0xdff7             // neg    edi
 23090  	LONG $0xf84c0f41         // cmovl    edi, r8d
 23091  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 23092  	LONG $0x01ce8348         // or    rsi, 1
 23093  
 23094  LBB3_913:
 23095  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 23096  	JE   LBB3_923
 23097  
 23098  LBB3_914:
 23099  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 23100  	WORD $0xc789             // mov    edi, eax
 23101  	WORD $0xdff7             // neg    edi
 23102  	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
 23103  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 23104  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
 23105  	WORD $0xc789             // mov    edi, eax
 23106  	WORD $0xdff7             // neg    edi
 23107  	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
 23108  	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
 23109  	LONG $0x02c68348         // add    rsi, 2
 23110  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 23111  	JNE  LBB3_914
 23112  	JMP  LBB3_923
 23113  
 23114  LBB3_218:
 23115  	WORD $0x8545; BYTE $0xc0 // test    r8d, r8d
 23116  	JLE  LBB3_923
 23117  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
 23118  	LONG $0x08f88341         // cmp    r8d, 8
 23119  	JB   LBB3_220
 23120  	LONG $0x8a048d4a         // lea    rax, [rdx + 4*r9]
 23121  	WORD $0x3948; BYTE $0xc8 // cmp    rax, rcx
 23122  	JBE  LBB3_527
 23123  	LONG $0x89048d4a         // lea    rax, [rcx + 4*r9]
 23124  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 23125  	JBE  LBB3_527
 23126  
 23127  LBB3_220:
 23128  	WORD $0xf631 // xor    esi, esi
 23129  
 23130  LBB3_919:
 23131  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
 23132  	WORD $0xf748; BYTE $0xd0 // not    rax
 23133  	LONG $0x01c1f641         // test    r9b, 1
 23134  	JE   LBB3_921
 23135  	LONG $0xb2048b44         // mov    r8d, dword [rdx + 4*rsi]
 23136  	WORD $0x8944; BYTE $0xc7 // mov    edi, r8d
 23137  	WORD $0xdff7             // neg    edi
 23138  	LONG $0xf84c0f41         // cmovl    edi, r8d
 23139  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 23140  	LONG $0x01ce8348         // or    rsi, 1
 23141  
 23142  LBB3_921:
 23143  	WORD $0x014c; BYTE $0xc8 // add    rax, r9
 23144  	JE   LBB3_923
 23145  
 23146  LBB3_922:
 23147  	WORD $0x048b; BYTE $0xb2 // mov    eax, dword [rdx + 4*rsi]
 23148  	WORD $0xc789             // mov    edi, eax
 23149  	WORD $0xdff7             // neg    edi
 23150  	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
 23151  	WORD $0x3c89; BYTE $0xb1 // mov    dword [rcx + 4*rsi], edi
 23152  	LONG $0x04b2448b         // mov    eax, dword [rdx + 4*rsi + 4]
 23153  	WORD $0xc789             // mov    edi, eax
 23154  	WORD $0xdff7             // neg    edi
 23155  	WORD $0x4c0f; BYTE $0xf8 // cmovl    edi, eax
 23156  	LONG $0x04b17c89         // mov    dword [rcx + 4*rsi + 4], edi
 23157  	LONG $0x02c68348         // add    rsi, 2
 23158  	WORD $0x3949; BYTE $0xf1 // cmp    r9, rsi
 23159  	JNE  LBB3_922
 23160  	JMP  LBB3_923
 23161  
 23162  LBB3_221:
 23163  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 23164  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 23165  	LONG $0xf8428d48         // lea    rax, [rdx - 8]
 23166  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23167  	LONG $0x03efc148         // shr    rdi, 3
 23168  	LONG $0x01c78348         // add    rdi, 1
 23169  	WORD $0xfe89             // mov    esi, edi
 23170  	WORD $0xe683; BYTE $0x07 // and    esi, 7
 23171  	LONG $0x38f88348         // cmp    rax, 56
 23172  	JAE  LBB3_367
 23173  	WORD $0xc031             // xor    eax, eax
 23174  	JMP  LBB3_369
 23175  
 23176  LBB3_265:
 23177  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 23178  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 23179  	LONG $0xfc428d48         // lea    rax, [rdx - 4]
 23180  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23181  	LONG $0x02efc148         // shr    rdi, 2
 23182  	LONG $0x01c78348         // add    rdi, 1
 23183  	WORD $0xfe89             // mov    esi, edi
 23184  	WORD $0xe683; BYTE $0x07 // and    esi, 7
 23185  	LONG $0x1cf88348         // cmp    rax, 28
 23186  	JAE  LBB3_414
 23187  	WORD $0xc031             // xor    eax, eax
 23188  	JMP  LBB3_416
 23189  
 23190  LBB3_279:
 23191  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 23192  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 23193  	LONG $0xf0428d48         // lea    rax, [rdx - 16]
 23194  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23195  	LONG $0x04efc148         // shr    rdi, 4
 23196  	LONG $0x01c78348         // add    rdi, 1
 23197  	WORD $0xfe89             // mov    esi, edi
 23198  	WORD $0xe683; BYTE $0x07 // and    esi, 7
 23199  	LONG $0x70f88348         // cmp    rax, 112
 23200  	JAE  LBB3_431
 23201  	WORD $0xc031             // xor    eax, eax
 23202  	JMP  LBB3_433
 23203  
 23204  LBB3_338:
 23205  	WORD $0x8944; BYTE $0xca       // mov    edx, r9d
 23206  	WORD $0xe283; BYTE $0xe0       // and    edx, -32
 23207  	LONG $0xe0428d48               // lea    rax, [rdx - 32]
 23208  	WORD $0x8948; BYTE $0xc7       // mov    rdi, rax
 23209  	LONG $0x05efc148               // shr    rdi, 5
 23210  	LONG $0x01c78348               // add    rdi, 1
 23211  	WORD $0xfe89                   // mov    esi, edi
 23212  	WORD $0xe683; BYTE $0x07       // and    esi, 7
 23213  	LONG $0x00e03d48; WORD $0x0000 // cmp    rax, 224
 23214  	JAE  LBB3_498
 23215  	WORD $0xc031                   // xor    eax, eax
 23216  	JMP  LBB3_500
 23217  
 23218  LBB3_374:
 23219  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23220  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 23221  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 23222  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23223  	LONG $0x03e8c149         // shr    r8, 3
 23224  	LONG $0x01c08349         // add    r8, 1
 23225  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23226  	JE   LBB3_610
 23227  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23228  	LONG $0xfee08348         // and    rax, -2
 23229  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23230  	WORD $0xff31             // xor    edi, edi
 23231  
 23232  LBB3_376:
 23233  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 23234  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 23235  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23236  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 23237  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23238  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 23239  	LONG $0x147f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm2
 23240  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
 23241  	LONG $0x446f0ff3; WORD $0x20ba // movdqu    xmm0, oword [rdx + 4*rdi + 32]
 23242  	LONG $0x4c6f0ff3; WORD $0x30ba // movdqu    xmm1, oword [rdx + 4*rdi + 48]
 23243  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23244  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 23245  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23246  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 23247  	LONG $0x547f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm2
 23248  	LONG $0x447f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm0
 23249  	LONG $0x10c78348               // add    rdi, 16
 23250  	LONG $0x02c08348               // add    rax, 2
 23251  	JNE  LBB3_376
 23252  	JMP  LBB3_611
 23253  
 23254  LBB3_377:
 23255  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23256  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 23257  	LONG $0xf8468d48             // lea    rax, [rsi - 8]
 23258  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23259  	LONG $0x03e8c149             // shr    r8, 3
 23260  	LONG $0x01c08349             // add    r8, 1
 23261  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23262  	JE   LBB3_618
 23263  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23264  	LONG $0xfee08348             // and    rax, -2
 23265  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23266  	WORD $0xff31                 // xor    edi, edi
 23267  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 23268  	LONG $0x4d6f0f66; BYTE $0x20 // movdqa    xmm1, oword 32[rbp] /* [rip + .LCPI3_3] */
 23269  
 23270  LBB3_379:
 23271  	LONG $0x146f0ff3; BYTE $0xba   // movdqu    xmm2, oword [rdx + 4*rdi]
 23272  	LONG $0x5c6f0ff3; WORD $0x10ba // movdqu    xmm3, oword [rdx + 4*rdi + 16]
 23273  	LONG $0xd0760f66               // pcmpeqd    xmm2, xmm0
 23274  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 23275  	LONG $0xd8760f66               // pcmpeqd    xmm3, xmm0
 23276  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 23277  	LONG $0x147f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm2
 23278  	LONG $0x5c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm3
 23279  	LONG $0x546f0ff3; WORD $0x20ba // movdqu    xmm2, oword [rdx + 4*rdi + 32]
 23280  	LONG $0x5c6f0ff3; WORD $0x30ba // movdqu    xmm3, oword [rdx + 4*rdi + 48]
 23281  	LONG $0xd0760f66               // pcmpeqd    xmm2, xmm0
 23282  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 23283  	LONG $0xd8760f66               // pcmpeqd    xmm3, xmm0
 23284  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 23285  	LONG $0x547f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm2
 23286  	LONG $0x5c7f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm3
 23287  	LONG $0x10c78348               // add    rdi, 16
 23288  	LONG $0x02c08348               // add    rax, 2
 23289  	JNE  LBB3_379
 23290  	JMP  LBB3_619
 23291  
 23292  LBB3_380:
 23293  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23294  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 23295  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 23296  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23297  	LONG $0x03efc148         // shr    rdi, 3
 23298  	LONG $0x01c78348         // add    rdi, 1
 23299  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 23300  	LONG $0x03e08341         // and    r8d, 3
 23301  	LONG $0x18f88348         // cmp    rax, 24
 23302  	JAE  LBB3_530
 23303  	WORD $0xc031             // xor    eax, eax
 23304  	JMP  LBB3_532
 23305  
 23306  LBB3_382:
 23307  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23308  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 23309  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 23310  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23311  	LONG $0x03efc148         // shr    rdi, 3
 23312  	LONG $0x01c78348         // add    rdi, 1
 23313  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 23314  	LONG $0x03e08341         // and    r8d, 3
 23315  	LONG $0x18f88348         // cmp    rax, 24
 23316  	JAE  LBB3_540
 23317  	WORD $0xc031             // xor    eax, eax
 23318  	JMP  LBB3_542
 23319  
 23320  LBB3_384:
 23321  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23322  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23323  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23324  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23325  	LONG $0x02e8c149             // shr    r8, 2
 23326  	LONG $0x01c08349             // add    r8, 1
 23327  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23328  	JE   LBB3_626
 23329  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23330  	LONG $0xfee08348             // and    rax, -2
 23331  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23332  	WORD $0xff31                 // xor    edi, edi
 23333  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 23334  
 23335  LBB3_386:
 23336  	LONG $0x0c100f66; BYTE $0xfa   // movupd    xmm1, oword [rdx + 8*rdi]
 23337  	LONG $0x54100f66; WORD $0x10fa // movupd    xmm2, oword [rdx + 8*rdi + 16]
 23338  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 23339  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 23340  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 23341  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 23342  	LONG $0x4c100f66; WORD $0x20fa // movupd    xmm1, oword [rdx + 8*rdi + 32]
 23343  	LONG $0x54100f66; WORD $0x30fa // movupd    xmm2, oword [rdx + 8*rdi + 48]
 23344  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 23345  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 23346  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 23347  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 23348  	LONG $0x08c78348               // add    rdi, 8
 23349  	LONG $0x02c08348               // add    rax, 2
 23350  	JNE  LBB3_386
 23351  	JMP  LBB3_627
 23352  
 23353  LBB3_387:
 23354  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23355  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23356  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23357  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23358  	LONG $0x02e8c149             // shr    r8, 2
 23359  	LONG $0x01c08349             // add    r8, 1
 23360  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23361  	JE   LBB3_636
 23362  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23363  	LONG $0xfee08348             // and    rax, -2
 23364  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23365  	WORD $0xff31                 // xor    edi, edi
 23366  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI3_0] */
 23367  
 23368  LBB3_389:
 23369  	LONG $0x0c100f66; BYTE $0xfa   // movupd    xmm1, oword [rdx + 8*rdi]
 23370  	LONG $0x54100f66; WORD $0x10fa // movupd    xmm2, oword [rdx + 8*rdi + 16]
 23371  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 23372  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 23373  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 23374  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 23375  	LONG $0x4c100f66; WORD $0x20fa // movupd    xmm1, oword [rdx + 8*rdi + 32]
 23376  	LONG $0x54100f66; WORD $0x30fa // movupd    xmm2, oword [rdx + 8*rdi + 48]
 23377  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 23378  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 23379  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 23380  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 23381  	LONG $0x08c78348               // add    rdi, 8
 23382  	LONG $0x02c08348               // add    rax, 2
 23383  	JNE  LBB3_389
 23384  	JMP  LBB3_637
 23385  
 23386  LBB3_390:
 23387  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23388  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23389  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23390  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23391  	LONG $0x02e8c149             // shr    r8, 2
 23392  	LONG $0x01c08349             // add    r8, 1
 23393  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23394  	JE   LBB3_646
 23395  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23396  	LONG $0xfee08348             // and    rax, -2
 23397  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23398  	WORD $0xff31                 // xor    edi, edi
 23399  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 23400  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI3_0] */
 23401  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI3_1] */
 23402  
 23403  LBB3_392:
 23404  	LONG $0x1c100f66; BYTE $0xfa   // movupd    xmm3, oword [rdx + 8*rdi]
 23405  	LONG $0x64100f66; WORD $0x10fa // movupd    xmm4, oword [rdx + 8*rdi + 16]
 23406  	LONG $0xeb280f66               // movapd    xmm5, xmm3
 23407  	LONG $0xe9540f66               // andpd    xmm5, xmm1
 23408  	LONG $0xea560f66               // orpd    xmm5, xmm2
 23409  	LONG $0xf4280f66               // movapd    xmm6, xmm4
 23410  	LONG $0xf1540f66               // andpd    xmm6, xmm1
 23411  	LONG $0xf2560f66               // orpd    xmm6, xmm2
 23412  	LONG $0xd8c20f66; BYTE $0x04   // cmpneqpd    xmm3, xmm0
 23413  	LONG $0xdd540f66               // andpd    xmm3, xmm5
 23414  	LONG $0xe0c20f66; BYTE $0x04   // cmpneqpd    xmm4, xmm0
 23415  	LONG $0xe6540f66               // andpd    xmm4, xmm6
 23416  	LONG $0x1c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm3
 23417  	LONG $0x64110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm4
 23418  	LONG $0x5c100f66; WORD $0x20fa // movupd    xmm3, oword [rdx + 8*rdi + 32]
 23419  	LONG $0x64100f66; WORD $0x30fa // movupd    xmm4, oword [rdx + 8*rdi + 48]
 23420  	LONG $0xeb280f66               // movapd    xmm5, xmm3
 23421  	LONG $0xe9540f66               // andpd    xmm5, xmm1
 23422  	LONG $0xea560f66               // orpd    xmm5, xmm2
 23423  	LONG $0xf4280f66               // movapd    xmm6, xmm4
 23424  	LONG $0xf1540f66               // andpd    xmm6, xmm1
 23425  	LONG $0xf2560f66               // orpd    xmm6, xmm2
 23426  	LONG $0xd8c20f66; BYTE $0x04   // cmpneqpd    xmm3, xmm0
 23427  	LONG $0xdd540f66               // andpd    xmm3, xmm5
 23428  	LONG $0xe0c20f66; BYTE $0x04   // cmpneqpd    xmm4, xmm0
 23429  	LONG $0xe6540f66               // andpd    xmm4, xmm6
 23430  	LONG $0x5c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm3
 23431  	LONG $0x64110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm4
 23432  	LONG $0x08c78348               // add    rdi, 8
 23433  	LONG $0x02c08348               // add    rax, 2
 23434  	JNE  LBB3_392
 23435  	JMP  LBB3_647
 23436  
 23437  LBB3_393:
 23438  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23439  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23440  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23441  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23442  	LONG $0x02e8c149             // shr    r8, 2
 23443  	LONG $0x01c08349             // add    r8, 1
 23444  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23445  	JE   LBB3_655
 23446  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23447  	LONG $0xfee08348             // and    rax, -2
 23448  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23449  	WORD $0xff31                 // xor    edi, edi
 23450  	LONG $0x45280f66; BYTE $0x70 // movapd    xmm0, oword 112[rbp] /* [rip + .LCPI3_8] */
 23451  
 23452  LBB3_395:
 23453  	LONG $0x0c100f66; BYTE $0xfa   // movupd    xmm1, oword [rdx + 8*rdi]
 23454  	LONG $0x54100f66; WORD $0x10fa // movupd    xmm2, oword [rdx + 8*rdi + 16]
 23455  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 23456  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 23457  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 23458  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 23459  	LONG $0x4c100f66; WORD $0x20fa // movupd    xmm1, oword [rdx + 8*rdi + 32]
 23460  	LONG $0x54100f66; WORD $0x30fa // movupd    xmm2, oword [rdx + 8*rdi + 48]
 23461  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 23462  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 23463  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 23464  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 23465  	LONG $0x08c78348               // add    rdi, 8
 23466  	LONG $0x02c08348               // add    rax, 2
 23467  	JNE  LBB3_395
 23468  	JMP  LBB3_656
 23469  
 23470  LBB3_396:
 23471  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23472  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23473  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23474  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23475  	LONG $0x02e8c149             // shr    r8, 2
 23476  	LONG $0x01c08349             // add    r8, 1
 23477  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23478  	JE   LBB3_663
 23479  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23480  	LONG $0xfee08348             // and    rax, -2
 23481  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23482  	WORD $0xff31                 // xor    edi, edi
 23483  	LONG $0x45280f66; BYTE $0x70 // movapd    xmm0, oword 112[rbp] /* [rip + .LCPI3_8] */
 23484  
 23485  LBB3_398:
 23486  	LONG $0x0c100f66; BYTE $0xfa   // movupd    xmm1, oword [rdx + 8*rdi]
 23487  	LONG $0x54100f66; WORD $0x10fa // movupd    xmm2, oword [rdx + 8*rdi + 16]
 23488  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 23489  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 23490  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 23491  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 23492  	LONG $0x4c100f66; WORD $0x20fa // movupd    xmm1, oword [rdx + 8*rdi + 32]
 23493  	LONG $0x54100f66; WORD $0x30fa // movupd    xmm2, oword [rdx + 8*rdi + 48]
 23494  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 23495  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 23496  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 23497  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 23498  	LONG $0x08c78348               // add    rdi, 8
 23499  	LONG $0x02c08348               // add    rax, 2
 23500  	JNE  LBB3_398
 23501  	JMP  LBB3_664
 23502  
 23503  LBB3_399:
 23504  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23505  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 23506  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 23507  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23508  	LONG $0x05e8c149         // shr    r8, 5
 23509  	LONG $0x01c08349         // add    r8, 1
 23510  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23511  	JE   LBB3_671
 23512  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23513  	LONG $0xfee08348         // and    rax, -2
 23514  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23515  	WORD $0xff31             // xor    edi, edi
 23516  
 23517  LBB3_401:
 23518  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 23519  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 23520  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23521  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 23522  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23523  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 23524  	LONG $0x147f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm2
 23525  	LONG $0x447f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm0
 23526  	LONG $0x446f0ff3; WORD $0x203a // movdqu    xmm0, oword [rdx + rdi + 32]
 23527  	LONG $0x4c6f0ff3; WORD $0x303a // movdqu    xmm1, oword [rdx + rdi + 48]
 23528  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23529  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 23530  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23531  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 23532  	LONG $0x547f0ff3; WORD $0x2039 // movdqu    oword [rcx + rdi + 32], xmm2
 23533  	LONG $0x447f0ff3; WORD $0x3039 // movdqu    oword [rcx + rdi + 48], xmm0
 23534  	LONG $0x40c78348               // add    rdi, 64
 23535  	LONG $0x02c08348               // add    rax, 2
 23536  	JNE  LBB3_401
 23537  	JMP  LBB3_672
 23538  
 23539  LBB3_402:
 23540  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23541  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 23542  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 23543  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23544  	LONG $0x05e8c149         // shr    r8, 5
 23545  	LONG $0x01c08349         // add    r8, 1
 23546  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23547  	JE   LBB3_679
 23548  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23549  	LONG $0xfee08348         // and    rax, -2
 23550  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23551  	WORD $0xff31             // xor    edi, edi
 23552  
 23553  LBB3_404:
 23554  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 23555  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 23556  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23557  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 23558  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23559  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 23560  	LONG $0x147f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm2
 23561  	LONG $0x447f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm0
 23562  	LONG $0x446f0ff3; WORD $0x203a // movdqu    xmm0, oword [rdx + rdi + 32]
 23563  	LONG $0x4c6f0ff3; WORD $0x303a // movdqu    xmm1, oword [rdx + rdi + 48]
 23564  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23565  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 23566  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23567  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 23568  	LONG $0x547f0ff3; WORD $0x2039 // movdqu    oword [rcx + rdi + 32], xmm2
 23569  	LONG $0x447f0ff3; WORD $0x3039 // movdqu    oword [rcx + rdi + 48], xmm0
 23570  	LONG $0x40c78348               // add    rdi, 64
 23571  	LONG $0x02c08348               // add    rax, 2
 23572  	JNE  LBB3_404
 23573  	JMP  LBB3_680
 23574  
 23575  LBB3_405:
 23576  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23577  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 23578  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
 23579  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23580  	LONG $0x05e8c149             // shr    r8, 5
 23581  	LONG $0x01c08349             // add    r8, 1
 23582  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23583  	JE   LBB3_687
 23584  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23585  	LONG $0xfee08348             // and    rax, -2
 23586  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23587  	WORD $0xff31                 // xor    edi, edi
 23588  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 23589  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 23590  	LONG $0x656f0f66; BYTE $0x50 // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI3_6] */
 23591  
 23592  LBB3_407:
 23593  	LONG $0x2c6f0ff3; BYTE $0x3a   // movdqu    xmm5, oword [rdx + rdi]
 23594  	LONG $0x746f0ff3; WORD $0x103a // movdqu    xmm6, oword [rdx + rdi + 16]
 23595  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 23596  	LONG $0xc5640f66               // pcmpgtb    xmm0, xmm5
 23597  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 23598  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 23599  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 23600  	LONG $0xce640f66               // pcmpgtb    xmm1, xmm6
 23601  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 23602  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 23603  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 23604  	LONG $0x10380f66; BYTE $0xfd   // pblendvb    xmm7, xmm5, xmm0
 23605  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 23606  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 23607  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 23608  	LONG $0x3c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm7
 23609  	LONG $0x6c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm5
 23610  	LONG $0x6c6f0ff3; WORD $0x203a // movdqu    xmm5, oword [rdx + rdi + 32]
 23611  	LONG $0x746f0ff3; WORD $0x303a // movdqu    xmm6, oword [rdx + rdi + 48]
 23612  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 23613  	LONG $0xc5640f66               // pcmpgtb    xmm0, xmm5
 23614  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 23615  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 23616  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 23617  	LONG $0xce640f66               // pcmpgtb    xmm1, xmm6
 23618  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 23619  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 23620  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 23621  	LONG $0x10380f66; BYTE $0xfd   // pblendvb    xmm7, xmm5, xmm0
 23622  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 23623  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 23624  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 23625  	LONG $0x7c7f0ff3; WORD $0x2039 // movdqu    oword [rcx + rdi + 32], xmm7
 23626  	LONG $0x6c7f0ff3; WORD $0x3039 // movdqu    oword [rcx + rdi + 48], xmm5
 23627  	LONG $0x40c78348               // add    rdi, 64
 23628  	LONG $0x02c08348               // add    rax, 2
 23629  	JNE  LBB3_407
 23630  	JMP  LBB3_688
 23631  
 23632  LBB3_408:
 23633  	WORD $0x8944; BYTE $0xce             // mov    esi, r9d
 23634  	WORD $0xe683; BYTE $0xf0             // and    esi, -16
 23635  	LONG $0xf0468d48                     // lea    rax, [rsi - 16]
 23636  	WORD $0x8949; BYTE $0xc0             // mov    r8, rax
 23637  	LONG $0x04e8c149                     // shr    r8, 4
 23638  	LONG $0x01c08349                     // add    r8, 1
 23639  	WORD $0x8548; BYTE $0xc0             // test    rax, rax
 23640  	JE   LBB3_696
 23641  	WORD $0x894c; BYTE $0xc0             // mov    rax, r8
 23642  	LONG $0xfee08348                     // and    rax, -2
 23643  	WORD $0xf748; BYTE $0xd8             // neg    rax
 23644  	WORD $0xff31                         // xor    edi, edi
 23645  	QUAD $0x000090856f0f4466; BYTE $0x00 // movdqa    xmm8, oword 144[rbp] /* [rip + .LCPI3_10] */
 23646  
 23647  LBB3_410:
 23648  	LONG $0x21380f66; WORD $0x3a64; BYTE $0x0c // pmovsxbd    xmm4, dword [rdx + rdi + 12]
 23649  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbd    xmm1, dword [rdx + rdi + 8]
 23650  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x04 // pmovsxbd    xmm3, dword [rdx + rdi + 4]
 23651  	LONG $0x21380f66; WORD $0x3a14             // pmovsxbd    xmm2, dword [rdx + rdi]
 23652  	LONG $0xea6f0f66                           // movdqa    xmm5, xmm2
 23653  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 23654  	LONG $0xf36f0f66                           // movdqa    xmm6, xmm3
 23655  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 23656  	LONG $0xf96f0f66                           // movdqa    xmm7, xmm1
 23657  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 23658  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 23659  	LONG $0xe0720f66; BYTE $0x07               // psrad    xmm0, 7
 23660  	LONG $0xe0fe0f66                           // paddd    xmm4, xmm0
 23661  	LONG $0xcffe0f66                           // paddd    xmm1, xmm7
 23662  	LONG $0xdefe0f66                           // paddd    xmm3, xmm6
 23663  	LONG $0xd5fe0f66                           // paddd    xmm2, xmm5
 23664  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 23665  	LONG $0xdeef0f66                           // pxor    xmm3, xmm6
 23666  	LONG $0xcfef0f66                           // pxor    xmm1, xmm7
 23667  	LONG $0xe0ef0f66                           // pxor    xmm4, xmm0
 23668  	LONG $0xdb0f4166; BYTE $0xe0               // pand    xmm4, xmm8
 23669  	LONG $0xdb0f4166; BYTE $0xc8               // pand    xmm1, xmm8
 23670  	LONG $0x2b380f66; BYTE $0xcc               // packusdw    xmm1, xmm4
 23671  	LONG $0xdb0f4166; BYTE $0xd8               // pand    xmm3, xmm8
 23672  	LONG $0xdb0f4166; BYTE $0xd0               // pand    xmm2, xmm8
 23673  	LONG $0x2b380f66; BYTE $0xd3               // packusdw    xmm2, xmm3
 23674  	LONG $0xd1670f66                           // packuswb    xmm2, xmm1
 23675  	LONG $0x147f0ff3; BYTE $0x39               // movdqu    oword [rcx + rdi], xmm2
 23676  	LONG $0x21380f66; WORD $0x3a64; BYTE $0x1c // pmovsxbd    xmm4, dword [rdx + rdi + 28]
 23677  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x18 // pmovsxbd    xmm1, dword [rdx + rdi + 24]
 23678  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x14 // pmovsxbd    xmm3, dword [rdx + rdi + 20]
 23679  	LONG $0x21380f66; WORD $0x3a54; BYTE $0x10 // pmovsxbd    xmm2, dword [rdx + rdi + 16]
 23680  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 23681  	LONG $0xe0720f66; BYTE $0x07               // psrad    xmm0, 7
 23682  	LONG $0xeb6f0f66                           // movdqa    xmm5, xmm3
 23683  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 23684  	LONG $0xf16f0f66                           // movdqa    xmm6, xmm1
 23685  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 23686  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 23687  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 23688  	LONG $0xe7fe0f66                           // paddd    xmm4, xmm7
 23689  	LONG $0xcefe0f66                           // paddd    xmm1, xmm6
 23690  	LONG $0xddfe0f66                           // paddd    xmm3, xmm5
 23691  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 23692  	LONG $0xd0ef0f66                           // pxor    xmm2, xmm0
 23693  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 23694  	LONG $0xceef0f66                           // pxor    xmm1, xmm6
 23695  	LONG $0xe7ef0f66                           // pxor    xmm4, xmm7
 23696  	LONG $0xdb0f4166; BYTE $0xe0               // pand    xmm4, xmm8
 23697  	LONG $0xdb0f4166; BYTE $0xc8               // pand    xmm1, xmm8
 23698  	LONG $0x2b380f66; BYTE $0xcc               // packusdw    xmm1, xmm4
 23699  	LONG $0xdb0f4166; BYTE $0xd8               // pand    xmm3, xmm8
 23700  	LONG $0xdb0f4166; BYTE $0xd0               // pand    xmm2, xmm8
 23701  	LONG $0x2b380f66; BYTE $0xd3               // packusdw    xmm2, xmm3
 23702  	LONG $0xd1670f66                           // packuswb    xmm2, xmm1
 23703  	LONG $0x547f0ff3; WORD $0x1039             // movdqu    oword [rcx + rdi + 16], xmm2
 23704  	LONG $0x20c78348                           // add    rdi, 32
 23705  	LONG $0x02c08348                           // add    rax, 2
 23706  	JNE  LBB3_410
 23707  	JMP  LBB3_697
 23708  
 23709  LBB3_411:
 23710  	WORD $0x8944; BYTE $0xce             // mov    esi, r9d
 23711  	WORD $0xe683; BYTE $0xf0             // and    esi, -16
 23712  	LONG $0xf0468d48                     // lea    rax, [rsi - 16]
 23713  	WORD $0x8949; BYTE $0xc0             // mov    r8, rax
 23714  	LONG $0x04e8c149                     // shr    r8, 4
 23715  	LONG $0x01c08349                     // add    r8, 1
 23716  	WORD $0x8548; BYTE $0xc0             // test    rax, rax
 23717  	JE   LBB3_704
 23718  	WORD $0x894c; BYTE $0xc0             // mov    rax, r8
 23719  	LONG $0xfee08348                     // and    rax, -2
 23720  	WORD $0xf748; BYTE $0xd8             // neg    rax
 23721  	WORD $0xff31                         // xor    edi, edi
 23722  	QUAD $0x000090856f0f4466; BYTE $0x00 // movdqa    xmm8, oword 144[rbp] /* [rip + .LCPI3_10] */
 23723  
 23724  LBB3_413:
 23725  	LONG $0x21380f66; WORD $0x3a64; BYTE $0x0c // pmovsxbd    xmm4, dword [rdx + rdi + 12]
 23726  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x08 // pmovsxbd    xmm1, dword [rdx + rdi + 8]
 23727  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x04 // pmovsxbd    xmm3, dword [rdx + rdi + 4]
 23728  	LONG $0x21380f66; WORD $0x3a14             // pmovsxbd    xmm2, dword [rdx + rdi]
 23729  	LONG $0xea6f0f66                           // movdqa    xmm5, xmm2
 23730  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 23731  	LONG $0xf36f0f66                           // movdqa    xmm6, xmm3
 23732  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 23733  	LONG $0xf96f0f66                           // movdqa    xmm7, xmm1
 23734  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 23735  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 23736  	LONG $0xe0720f66; BYTE $0x07               // psrad    xmm0, 7
 23737  	LONG $0xe0fe0f66                           // paddd    xmm4, xmm0
 23738  	LONG $0xcffe0f66                           // paddd    xmm1, xmm7
 23739  	LONG $0xdefe0f66                           // paddd    xmm3, xmm6
 23740  	LONG $0xd5fe0f66                           // paddd    xmm2, xmm5
 23741  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 23742  	LONG $0xdeef0f66                           // pxor    xmm3, xmm6
 23743  	LONG $0xcfef0f66                           // pxor    xmm1, xmm7
 23744  	LONG $0xe0ef0f66                           // pxor    xmm4, xmm0
 23745  	LONG $0xdb0f4166; BYTE $0xe0               // pand    xmm4, xmm8
 23746  	LONG $0xdb0f4166; BYTE $0xc8               // pand    xmm1, xmm8
 23747  	LONG $0x2b380f66; BYTE $0xcc               // packusdw    xmm1, xmm4
 23748  	LONG $0xdb0f4166; BYTE $0xd8               // pand    xmm3, xmm8
 23749  	LONG $0xdb0f4166; BYTE $0xd0               // pand    xmm2, xmm8
 23750  	LONG $0x2b380f66; BYTE $0xd3               // packusdw    xmm2, xmm3
 23751  	LONG $0xd1670f66                           // packuswb    xmm2, xmm1
 23752  	LONG $0x147f0ff3; BYTE $0x39               // movdqu    oword [rcx + rdi], xmm2
 23753  	LONG $0x21380f66; WORD $0x3a64; BYTE $0x1c // pmovsxbd    xmm4, dword [rdx + rdi + 28]
 23754  	LONG $0x21380f66; WORD $0x3a4c; BYTE $0x18 // pmovsxbd    xmm1, dword [rdx + rdi + 24]
 23755  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x14 // pmovsxbd    xmm3, dword [rdx + rdi + 20]
 23756  	LONG $0x21380f66; WORD $0x3a54; BYTE $0x10 // pmovsxbd    xmm2, dword [rdx + rdi + 16]
 23757  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 23758  	LONG $0xe0720f66; BYTE $0x07               // psrad    xmm0, 7
 23759  	LONG $0xeb6f0f66                           // movdqa    xmm5, xmm3
 23760  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 23761  	LONG $0xf16f0f66                           // movdqa    xmm6, xmm1
 23762  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 23763  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 23764  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 23765  	LONG $0xe7fe0f66                           // paddd    xmm4, xmm7
 23766  	LONG $0xcefe0f66                           // paddd    xmm1, xmm6
 23767  	LONG $0xddfe0f66                           // paddd    xmm3, xmm5
 23768  	LONG $0xd0fe0f66                           // paddd    xmm2, xmm0
 23769  	LONG $0xd0ef0f66                           // pxor    xmm2, xmm0
 23770  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 23771  	LONG $0xceef0f66                           // pxor    xmm1, xmm6
 23772  	LONG $0xe7ef0f66                           // pxor    xmm4, xmm7
 23773  	LONG $0xdb0f4166; BYTE $0xe0               // pand    xmm4, xmm8
 23774  	LONG $0xdb0f4166; BYTE $0xc8               // pand    xmm1, xmm8
 23775  	LONG $0x2b380f66; BYTE $0xcc               // packusdw    xmm1, xmm4
 23776  	LONG $0xdb0f4166; BYTE $0xd8               // pand    xmm3, xmm8
 23777  	LONG $0xdb0f4166; BYTE $0xd0               // pand    xmm2, xmm8
 23778  	LONG $0x2b380f66; BYTE $0xd3               // packusdw    xmm2, xmm3
 23779  	LONG $0xd1670f66                           // packuswb    xmm2, xmm1
 23780  	LONG $0x547f0ff3; WORD $0x1039             // movdqu    oword [rcx + rdi + 16], xmm2
 23781  	LONG $0x20c78348                           // add    rdi, 32
 23782  	LONG $0x02c08348                           // add    rax, 2
 23783  	JNE  LBB3_413
 23784  	JMP  LBB3_705
 23785  
 23786  LBB3_421:
 23787  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23788  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 23789  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 23790  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23791  	LONG $0x02e8c149         // shr    r8, 2
 23792  	LONG $0x01c08349         // add    r8, 1
 23793  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23794  	JE   LBB3_712
 23795  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23796  	LONG $0xfee08348         // and    rax, -2
 23797  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23798  	WORD $0xff31             // xor    edi, edi
 23799  
 23800  LBB3_423:
 23801  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 23802  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 23803  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23804  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 23805  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23806  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 23807  	LONG $0x147f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm2
 23808  	LONG $0x447f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm0
 23809  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
 23810  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
 23811  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23812  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 23813  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23814  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 23815  	LONG $0x547f0ff3; WORD $0x20f9 // movdqu    oword [rcx + 8*rdi + 32], xmm2
 23816  	LONG $0x447f0ff3; WORD $0x30f9 // movdqu    oword [rcx + 8*rdi + 48], xmm0
 23817  	LONG $0x08c78348               // add    rdi, 8
 23818  	LONG $0x02c08348               // add    rax, 2
 23819  	JNE  LBB3_423
 23820  	JMP  LBB3_713
 23821  
 23822  LBB3_424:
 23823  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23824  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 23825  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 23826  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 23827  	LONG $0x02e8c149             // shr    r8, 2
 23828  	LONG $0x01c08349             // add    r8, 1
 23829  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 23830  	JE   LBB3_720
 23831  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 23832  	LONG $0xfee08348             // and    rax, -2
 23833  	WORD $0xf748; BYTE $0xd8     // neg    rax
 23834  	WORD $0xff31                 // xor    edi, edi
 23835  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 23836  	LONG $0x4d6f0f66; BYTE $0x30 // movdqa    xmm1, oword 48[rbp] /* [rip + .LCPI3_4] */
 23837  
 23838  LBB3_426:
 23839  	LONG $0x146f0ff3; BYTE $0xfa   // movdqu    xmm2, oword [rdx + 8*rdi]
 23840  	LONG $0x5c6f0ff3; WORD $0x10fa // movdqu    xmm3, oword [rdx + 8*rdi + 16]
 23841  	LONG $0x29380f66; BYTE $0xd0   // pcmpeqq    xmm2, xmm0
 23842  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 23843  	LONG $0x29380f66; BYTE $0xd8   // pcmpeqq    xmm3, xmm0
 23844  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 23845  	LONG $0x147f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm2
 23846  	LONG $0x5c7f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm3
 23847  	LONG $0x546f0ff3; WORD $0x20fa // movdqu    xmm2, oword [rdx + 8*rdi + 32]
 23848  	LONG $0x5c6f0ff3; WORD $0x30fa // movdqu    xmm3, oword [rdx + 8*rdi + 48]
 23849  	LONG $0x29380f66; BYTE $0xd0   // pcmpeqq    xmm2, xmm0
 23850  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 23851  	LONG $0x29380f66; BYTE $0xd8   // pcmpeqq    xmm3, xmm0
 23852  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 23853  	LONG $0x547f0ff3; WORD $0x20f9 // movdqu    oword [rcx + 8*rdi + 32], xmm2
 23854  	LONG $0x5c7f0ff3; WORD $0x30f9 // movdqu    oword [rcx + 8*rdi + 48], xmm3
 23855  	LONG $0x08c78348               // add    rdi, 8
 23856  	LONG $0x02c08348               // add    rax, 2
 23857  	JNE  LBB3_426
 23858  	JMP  LBB3_721
 23859  
 23860  LBB3_427:
 23861  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23862  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 23863  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 23864  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23865  	LONG $0x02efc148         // shr    rdi, 2
 23866  	LONG $0x01c78348         // add    rdi, 1
 23867  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 23868  	LONG $0x03e08341         // and    r8d, 3
 23869  	LONG $0x0cf88348         // cmp    rax, 12
 23870  	JAE  LBB3_550
 23871  	WORD $0xc031             // xor    eax, eax
 23872  	JMP  LBB3_552
 23873  
 23874  LBB3_429:
 23875  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23876  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 23877  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 23878  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 23879  	LONG $0x02efc148         // shr    rdi, 2
 23880  	LONG $0x01c78348         // add    rdi, 1
 23881  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 23882  	LONG $0x03e08341         // and    r8d, 3
 23883  	LONG $0x0cf88348         // cmp    rax, 12
 23884  	JAE  LBB3_560
 23885  	WORD $0xc031             // xor    eax, eax
 23886  	JMP  LBB3_562
 23887  
 23888  LBB3_438:
 23889  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23890  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 23891  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 23892  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23893  	LONG $0x04e8c149         // shr    r8, 4
 23894  	LONG $0x01c08349         // add    r8, 1
 23895  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23896  	JE   LBB3_728
 23897  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23898  	LONG $0xfee08348         // and    rax, -2
 23899  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23900  	WORD $0xff31             // xor    edi, edi
 23901  
 23902  LBB3_440:
 23903  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 23904  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 23905  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23906  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23907  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23908  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23909  	LONG $0x147f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm2
 23910  	LONG $0x447f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm0
 23911  	LONG $0x446f0ff3; WORD $0x207a // movdqu    xmm0, oword [rdx + 2*rdi + 32]
 23912  	LONG $0x4c6f0ff3; WORD $0x307a // movdqu    xmm1, oword [rdx + 2*rdi + 48]
 23913  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23914  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23915  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23916  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23917  	LONG $0x547f0ff3; WORD $0x2079 // movdqu    oword [rcx + 2*rdi + 32], xmm2
 23918  	LONG $0x447f0ff3; WORD $0x3079 // movdqu    oword [rcx + 2*rdi + 48], xmm0
 23919  	LONG $0x20c78348               // add    rdi, 32
 23920  	LONG $0x02c08348               // add    rax, 2
 23921  	JNE  LBB3_440
 23922  	JMP  LBB3_729
 23923  
 23924  LBB3_441:
 23925  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23926  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 23927  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 23928  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23929  	LONG $0x04e8c149         // shr    r8, 4
 23930  	LONG $0x01c08349         // add    r8, 1
 23931  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23932  	JE   LBB3_736
 23933  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23934  	LONG $0xfee08348         // and    rax, -2
 23935  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23936  	WORD $0xff31             // xor    edi, edi
 23937  
 23938  LBB3_443:
 23939  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 23940  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 23941  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23942  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23943  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23944  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23945  	LONG $0x147f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm2
 23946  	LONG $0x447f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm0
 23947  	LONG $0x446f0ff3; WORD $0x207a // movdqu    xmm0, oword [rdx + 2*rdi + 32]
 23948  	LONG $0x4c6f0ff3; WORD $0x307a // movdqu    xmm1, oword [rdx + 2*rdi + 48]
 23949  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23950  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23951  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23952  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23953  	LONG $0x547f0ff3; WORD $0x2079 // movdqu    oword [rcx + 2*rdi + 32], xmm2
 23954  	LONG $0x447f0ff3; WORD $0x3079 // movdqu    oword [rcx + 2*rdi + 48], xmm0
 23955  	LONG $0x20c78348               // add    rdi, 32
 23956  	LONG $0x02c08348               // add    rax, 2
 23957  	JNE  LBB3_443
 23958  	JMP  LBB3_737
 23959  
 23960  LBB3_444:
 23961  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 23962  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 23963  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 23964  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 23965  	LONG $0x04e8c149         // shr    r8, 4
 23966  	LONG $0x01c08349         // add    r8, 1
 23967  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 23968  	JE   LBB3_744
 23969  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 23970  	LONG $0xfee08348         // and    rax, -2
 23971  	WORD $0xf748; BYTE $0xd8 // neg    rax
 23972  	WORD $0xff31             // xor    edi, edi
 23973  
 23974  LBB3_446:
 23975  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 23976  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 23977  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23978  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23979  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23980  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23981  	LONG $0x147f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm2
 23982  	LONG $0x447f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm0
 23983  	LONG $0x446f0ff3; WORD $0x207a // movdqu    xmm0, oword [rdx + 2*rdi + 32]
 23984  	LONG $0x4c6f0ff3; WORD $0x307a // movdqu    xmm1, oword [rdx + 2*rdi + 48]
 23985  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 23986  	LONG $0xd0f90f66               // psubw    xmm2, xmm0
 23987  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 23988  	LONG $0xc1f90f66               // psubw    xmm0, xmm1
 23989  	LONG $0x547f0ff3; WORD $0x2079 // movdqu    oword [rcx + 2*rdi + 32], xmm2
 23990  	LONG $0x447f0ff3; WORD $0x3079 // movdqu    oword [rcx + 2*rdi + 48], xmm0
 23991  	LONG $0x20c78348               // add    rdi, 32
 23992  	LONG $0x02c08348               // add    rax, 2
 23993  	JNE  LBB3_446
 23994  	JMP  LBB3_745
 23995  
 23996  LBB3_447:
 23997  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 23998  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 23999  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
 24000  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24001  	LONG $0x04e8c149             // shr    r8, 4
 24002  	LONG $0x01c08349             // add    r8, 1
 24003  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24004  	JE   LBB3_752
 24005  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24006  	LONG $0xfee08348             // and    rax, -2
 24007  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24008  	WORD $0xff31                 // xor    edi, edi
 24009  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 24010  	LONG $0x4d6f0f66; BYTE $0x40 // movdqa    xmm1, oword 64[rbp] /* [rip + .LCPI3_5] */
 24011  
 24012  LBB3_449:
 24013  	LONG $0x146f0ff3; BYTE $0x7a   // movdqu    xmm2, oword [rdx + 2*rdi]
 24014  	LONG $0x5c6f0ff3; WORD $0x107a // movdqu    xmm3, oword [rdx + 2*rdi + 16]
 24015  	LONG $0xd0750f66               // pcmpeqw    xmm2, xmm0
 24016  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 24017  	LONG $0xd8750f66               // pcmpeqw    xmm3, xmm0
 24018  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 24019  	LONG $0x147f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm2
 24020  	LONG $0x5c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm3
 24021  	LONG $0x546f0ff3; WORD $0x207a // movdqu    xmm2, oword [rdx + 2*rdi + 32]
 24022  	LONG $0x5c6f0ff3; WORD $0x307a // movdqu    xmm3, oword [rdx + 2*rdi + 48]
 24023  	LONG $0xd0750f66               // pcmpeqw    xmm2, xmm0
 24024  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 24025  	LONG $0xd8750f66               // pcmpeqw    xmm3, xmm0
 24026  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 24027  	LONG $0x547f0ff3; WORD $0x2079 // movdqu    oword [rcx + 2*rdi + 32], xmm2
 24028  	LONG $0x5c7f0ff3; WORD $0x3079 // movdqu    oword [rcx + 2*rdi + 48], xmm3
 24029  	LONG $0x20c78348               // add    rdi, 32
 24030  	LONG $0x02c08348               // add    rax, 2
 24031  	JNE  LBB3_449
 24032  	JMP  LBB3_753
 24033  
 24034  LBB3_450:
 24035  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24036  	WORD $0xe683; BYTE $0xf0     // and    esi, -16
 24037  	LONG $0xf0468d48             // lea    rax, [rsi - 16]
 24038  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24039  	LONG $0x04e8c149             // shr    r8, 4
 24040  	LONG $0x01c08349             // add    r8, 1
 24041  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24042  	JE   LBB3_760
 24043  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24044  	LONG $0xfee08348             // and    rax, -2
 24045  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24046  	WORD $0xff31                 // xor    edi, edi
 24047  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 24048  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 24049  	LONG $0x656f0f66; BYTE $0x40 // movdqa    xmm4, oword 64[rbp] /* [rip + .LCPI3_5] */
 24050  
 24051  LBB3_452:
 24052  	LONG $0x2c6f0ff3; BYTE $0x7a   // movdqu    xmm5, oword [rdx + 2*rdi]
 24053  	LONG $0x746f0ff3; WORD $0x107a // movdqu    xmm6, oword [rdx + 2*rdi + 16]
 24054  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24055  	LONG $0xc5650f66               // pcmpgtw    xmm0, xmm5
 24056  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 24057  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24058  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24059  	LONG $0xce650f66               // pcmpgtw    xmm1, xmm6
 24060  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 24061  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24062  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24063  	LONG $0x10380f66; BYTE $0xfd   // pblendvb    xmm7, xmm5, xmm0
 24064  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24065  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24066  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 24067  	LONG $0x3c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm7
 24068  	LONG $0x6c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm5
 24069  	LONG $0x6c6f0ff3; WORD $0x207a // movdqu    xmm5, oword [rdx + 2*rdi + 32]
 24070  	LONG $0x746f0ff3; WORD $0x307a // movdqu    xmm6, oword [rdx + 2*rdi + 48]
 24071  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24072  	LONG $0xc5650f66               // pcmpgtw    xmm0, xmm5
 24073  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 24074  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24075  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24076  	LONG $0xce650f66               // pcmpgtw    xmm1, xmm6
 24077  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 24078  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24079  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24080  	LONG $0x10380f66; BYTE $0xfd   // pblendvb    xmm7, xmm5, xmm0
 24081  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24082  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24083  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 24084  	LONG $0x7c7f0ff3; WORD $0x2079 // movdqu    oword [rcx + 2*rdi + 32], xmm7
 24085  	LONG $0x6c7f0ff3; WORD $0x3079 // movdqu    oword [rcx + 2*rdi + 48], xmm5
 24086  	LONG $0x20c78348               // add    rdi, 32
 24087  	LONG $0x02c08348               // add    rax, 2
 24088  	JNE  LBB3_452
 24089  	JMP  LBB3_761
 24090  
 24091  LBB3_453:
 24092  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24093  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 24094  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 24095  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 24096  	LONG $0x04efc148         // shr    rdi, 4
 24097  	LONG $0x01c78348         // add    rdi, 1
 24098  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 24099  	LONG $0x03e08341         // and    r8d, 3
 24100  	LONG $0x30f88348         // cmp    rax, 48
 24101  	JAE  LBB3_570
 24102  	WORD $0xc031             // xor    eax, eax
 24103  	JMP  LBB3_572
 24104  
 24105  LBB3_455:
 24106  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24107  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24108  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24109  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24110  	LONG $0x03e8c149         // shr    r8, 3
 24111  	LONG $0x01c08349         // add    r8, 1
 24112  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24113  	JE   LBB3_769
 24114  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24115  	LONG $0xfee08348         // and    rax, -2
 24116  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24117  	WORD $0xff31             // xor    edi, edi
 24118  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 24119  
 24120  LBB3_457:
 24121  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
 24122  	LONG $0x23380f66; WORD $0x7a14             // pmovsxwd    xmm2, qword [rdx + 2*rdi]
 24123  	LONG $0xda6f0f66                           // movdqa    xmm3, xmm2
 24124  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 24125  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 24126  	LONG $0xe4720f66; BYTE $0x0f               // psrad    xmm4, 15
 24127  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 24128  	LONG $0xd3fe0f66                           // paddd    xmm2, xmm3
 24129  	LONG $0xd3ef0f66                           // pxor    xmm2, xmm3
 24130  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 24131  	LONG $0x0e3a0f66; WORD $0xaac8             // pblendw    xmm1, xmm0, 170
 24132  	LONG $0x0e3a0f66; WORD $0xaad0             // pblendw    xmm2, xmm0, 170
 24133  	LONG $0x2b380f66; BYTE $0xd1               // packusdw    xmm2, xmm1
 24134  	LONG $0x147f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm2
 24135  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x18 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 24]
 24136  	LONG $0x23380f66; WORD $0x7a54; BYTE $0x10 // pmovsxwd    xmm2, qword [rdx + 2*rdi + 16]
 24137  	LONG $0xda6f0f66                           // movdqa    xmm3, xmm2
 24138  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 24139  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 24140  	LONG $0xe4720f66; BYTE $0x0f               // psrad    xmm4, 15
 24141  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 24142  	LONG $0xd3fe0f66                           // paddd    xmm2, xmm3
 24143  	LONG $0xd3ef0f66                           // pxor    xmm2, xmm3
 24144  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 24145  	LONG $0x0e3a0f66; WORD $0xaac8             // pblendw    xmm1, xmm0, 170
 24146  	LONG $0x0e3a0f66; WORD $0xaad0             // pblendw    xmm2, xmm0, 170
 24147  	LONG $0x2b380f66; BYTE $0xd1               // packusdw    xmm2, xmm1
 24148  	LONG $0x547f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm2
 24149  	LONG $0x10c78348                           // add    rdi, 16
 24150  	LONG $0x02c08348                           // add    rax, 2
 24151  	JNE  LBB3_457
 24152  	JMP  LBB3_770
 24153  
 24154  LBB3_458:
 24155  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24156  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 24157  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 24158  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 24159  	LONG $0x04efc148         // shr    rdi, 4
 24160  	LONG $0x01c78348         // add    rdi, 1
 24161  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 24162  	LONG $0x03e08341         // and    r8d, 3
 24163  	LONG $0x30f88348         // cmp    rax, 48
 24164  	JAE  LBB3_580
 24165  	WORD $0xc031             // xor    eax, eax
 24166  	JMP  LBB3_582
 24167  
 24168  LBB3_460:
 24169  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24170  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24171  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24172  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24173  	LONG $0x03e8c149         // shr    r8, 3
 24174  	LONG $0x01c08349         // add    r8, 1
 24175  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24176  	JE   LBB3_777
 24177  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24178  	LONG $0xfee08348         // and    rax, -2
 24179  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24180  	WORD $0xff31             // xor    edi, edi
 24181  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 24182  
 24183  LBB3_462:
 24184  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x08 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 8]
 24185  	LONG $0x23380f66; WORD $0x7a14             // pmovsxwd    xmm2, qword [rdx + 2*rdi]
 24186  	LONG $0xda6f0f66                           // movdqa    xmm3, xmm2
 24187  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 24188  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 24189  	LONG $0xe4720f66; BYTE $0x0f               // psrad    xmm4, 15
 24190  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 24191  	LONG $0xd3fe0f66                           // paddd    xmm2, xmm3
 24192  	LONG $0xd3ef0f66                           // pxor    xmm2, xmm3
 24193  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 24194  	LONG $0x0e3a0f66; WORD $0xaac8             // pblendw    xmm1, xmm0, 170
 24195  	LONG $0x0e3a0f66; WORD $0xaad0             // pblendw    xmm2, xmm0, 170
 24196  	LONG $0x2b380f66; BYTE $0xd1               // packusdw    xmm2, xmm1
 24197  	LONG $0x147f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm2
 24198  	LONG $0x23380f66; WORD $0x7a4c; BYTE $0x18 // pmovsxwd    xmm1, qword [rdx + 2*rdi + 24]
 24199  	LONG $0x23380f66; WORD $0x7a54; BYTE $0x10 // pmovsxwd    xmm2, qword [rdx + 2*rdi + 16]
 24200  	LONG $0xda6f0f66                           // movdqa    xmm3, xmm2
 24201  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 24202  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 24203  	LONG $0xe4720f66; BYTE $0x0f               // psrad    xmm4, 15
 24204  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 24205  	LONG $0xd3fe0f66                           // paddd    xmm2, xmm3
 24206  	LONG $0xd3ef0f66                           // pxor    xmm2, xmm3
 24207  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 24208  	LONG $0x0e3a0f66; WORD $0xaac8             // pblendw    xmm1, xmm0, 170
 24209  	LONG $0x0e3a0f66; WORD $0xaad0             // pblendw    xmm2, xmm0, 170
 24210  	LONG $0x2b380f66; BYTE $0xd1               // packusdw    xmm2, xmm1
 24211  	LONG $0x547f0ff3; WORD $0x1079             // movdqu    oword [rcx + 2*rdi + 16], xmm2
 24212  	LONG $0x10c78348                           // add    rdi, 16
 24213  	LONG $0x02c08348                           // add    rax, 2
 24214  	JNE  LBB3_462
 24215  	JMP  LBB3_778
 24216  
 24217  LBB3_463:
 24218  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24219  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 24220  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 24221  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24222  	LONG $0x02e8c149         // shr    r8, 2
 24223  	LONG $0x01c08349         // add    r8, 1
 24224  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24225  	JE   LBB3_785
 24226  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24227  	LONG $0xfee08348         // and    rax, -2
 24228  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24229  	WORD $0xff31             // xor    edi, edi
 24230  
 24231  LBB3_465:
 24232  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 24233  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 24234  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24235  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 24236  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24237  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 24238  	LONG $0x147f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm2
 24239  	LONG $0x447f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm0
 24240  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
 24241  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
 24242  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24243  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 24244  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24245  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 24246  	LONG $0x547f0ff3; WORD $0x20f9 // movdqu    oword [rcx + 8*rdi + 32], xmm2
 24247  	LONG $0x447f0ff3; WORD $0x30f9 // movdqu    oword [rcx + 8*rdi + 48], xmm0
 24248  	LONG $0x08c78348               // add    rdi, 8
 24249  	LONG $0x02c08348               // add    rax, 2
 24250  	JNE  LBB3_465
 24251  	JMP  LBB3_786
 24252  
 24253  LBB3_466:
 24254  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24255  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 24256  	LONG $0xf8468d48             // lea    rax, [rsi - 8]
 24257  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24258  	LONG $0x03e8c149             // shr    r8, 3
 24259  	LONG $0x01c08349             // add    r8, 1
 24260  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24261  	JE   LBB3_793
 24262  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24263  	LONG $0xfee08348             // and    rax, -2
 24264  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24265  	WORD $0xff31                 // xor    edi, edi
 24266  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 24267  
 24268  LBB3_468:
 24269  	LONG $0x0c100f66; BYTE $0xba   // movupd    xmm1, oword [rdx + 4*rdi]
 24270  	LONG $0x54100f66; WORD $0x10ba // movupd    xmm2, oword [rdx + 4*rdi + 16]
 24271  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 24272  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 24273  	LONG $0x0c110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm1
 24274  	LONG $0x54110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm2
 24275  	LONG $0x4c100f66; WORD $0x20ba // movupd    xmm1, oword [rdx + 4*rdi + 32]
 24276  	LONG $0x54100f66; WORD $0x30ba // movupd    xmm2, oword [rdx + 4*rdi + 48]
 24277  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 24278  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 24279  	LONG $0x4c110f66; WORD $0x20b9 // movupd    oword [rcx + 4*rdi + 32], xmm1
 24280  	LONG $0x54110f66; WORD $0x30b9 // movupd    oword [rcx + 4*rdi + 48], xmm2
 24281  	LONG $0x10c78348               // add    rdi, 16
 24282  	LONG $0x02c08348               // add    rax, 2
 24283  	JNE  LBB3_468
 24284  	JMP  LBB3_794
 24285  
 24286  LBB3_469:
 24287  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24288  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 24289  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 24290  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24291  	LONG $0x02e8c149         // shr    r8, 2
 24292  	LONG $0x01c08349         // add    r8, 1
 24293  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24294  	JE   LBB3_803
 24295  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24296  	LONG $0xfee08348         // and    rax, -2
 24297  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24298  	WORD $0xff31             // xor    edi, edi
 24299  
 24300  LBB3_471:
 24301  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 24302  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 24303  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24304  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 24305  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24306  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 24307  	LONG $0x147f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm2
 24308  	LONG $0x447f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm0
 24309  	LONG $0x446f0ff3; WORD $0x20fa // movdqu    xmm0, oword [rdx + 8*rdi + 32]
 24310  	LONG $0x4c6f0ff3; WORD $0x30fa // movdqu    xmm1, oword [rdx + 8*rdi + 48]
 24311  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24312  	LONG $0xd0fb0f66               // psubq    xmm2, xmm0
 24313  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24314  	LONG $0xc1fb0f66               // psubq    xmm0, xmm1
 24315  	LONG $0x547f0ff3; WORD $0x20f9 // movdqu    oword [rcx + 8*rdi + 32], xmm2
 24316  	LONG $0x447f0ff3; WORD $0x30f9 // movdqu    oword [rcx + 8*rdi + 48], xmm0
 24317  	LONG $0x08c78348               // add    rdi, 8
 24318  	LONG $0x02c08348               // add    rax, 2
 24319  	JNE  LBB3_471
 24320  	JMP  LBB3_804
 24321  
 24322  LBB3_472:
 24323  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24324  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 24325  	LONG $0xf8468d48             // lea    rax, [rsi - 8]
 24326  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24327  	LONG $0x03e8c149             // shr    r8, 3
 24328  	LONG $0x01c08349             // add    r8, 1
 24329  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24330  	JE   LBB3_811
 24331  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24332  	LONG $0xfee08348             // and    rax, -2
 24333  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24334  	WORD $0xff31                 // xor    edi, edi
 24335  	LONG $0x45280f66; BYTE $0x60 // movapd    xmm0, oword 96[rbp] /* [rip + .LCPI3_7] */
 24336  
 24337  LBB3_474:
 24338  	LONG $0x0c100f66; BYTE $0xba   // movupd    xmm1, oword [rdx + 4*rdi]
 24339  	LONG $0x54100f66; WORD $0x10ba // movupd    xmm2, oword [rdx + 4*rdi + 16]
 24340  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 24341  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 24342  	LONG $0x0c110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm1
 24343  	LONG $0x54110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm2
 24344  	LONG $0x4c100f66; WORD $0x20ba // movupd    xmm1, oword [rdx + 4*rdi + 32]
 24345  	LONG $0x54100f66; WORD $0x30ba // movupd    xmm2, oword [rdx + 4*rdi + 48]
 24346  	LONG $0xc8570f66               // xorpd    xmm1, xmm0
 24347  	LONG $0xd0570f66               // xorpd    xmm2, xmm0
 24348  	LONG $0x4c110f66; WORD $0x20b9 // movupd    oword [rcx + 4*rdi + 32], xmm1
 24349  	LONG $0x54110f66; WORD $0x30b9 // movupd    oword [rcx + 4*rdi + 48], xmm2
 24350  	LONG $0x10c78348               // add    rdi, 16
 24351  	LONG $0x02c08348               // add    rax, 2
 24352  	JNE  LBB3_474
 24353  	JMP  LBB3_812
 24354  
 24355  LBB3_475:
 24356  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24357  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 24358  	LONG $0xfc468d48             // lea    rax, [rsi - 4]
 24359  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24360  	LONG $0x02e8c149             // shr    r8, 2
 24361  	LONG $0x01c08349             // add    r8, 1
 24362  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24363  	JE   LBB3_821
 24364  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24365  	LONG $0xfee08348             // and    rax, -2
 24366  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24367  	WORD $0xff31                 // xor    edi, edi
 24368  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 24369  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 24370  	LONG $0x656f0f66; BYTE $0x30 // movdqa    xmm4, oword 48[rbp] /* [rip + .LCPI3_4] */
 24371  
 24372  LBB3_477:
 24373  	LONG $0x2c6f0ff3; BYTE $0xfa   // movdqu    xmm5, oword [rdx + 8*rdi]
 24374  	LONG $0x746f0ff3; WORD $0x10fa // movdqu    xmm6, oword [rdx + 8*rdi + 16]
 24375  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24376  	LONG $0x37380f66; BYTE $0xc5   // pcmpgtq    xmm0, xmm5
 24377  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 24378  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24379  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24380  	LONG $0x37380f66; BYTE $0xce   // pcmpgtq    xmm1, xmm6
 24381  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 24382  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24383  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24384  	LONG $0x15380f66; BYTE $0xfd   // blendvpd    xmm7, xmm5, xmm0
 24385  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24386  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24387  	LONG $0x15380f66; BYTE $0xee   // blendvpd    xmm5, xmm6, xmm0
 24388  	LONG $0x3c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm7
 24389  	LONG $0x6c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm5
 24390  	LONG $0x6c6f0ff3; WORD $0x20fa // movdqu    xmm5, oword [rdx + 8*rdi + 32]
 24391  	LONG $0x746f0ff3; WORD $0x30fa // movdqu    xmm6, oword [rdx + 8*rdi + 48]
 24392  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24393  	LONG $0x37380f66; BYTE $0xc5   // pcmpgtq    xmm0, xmm5
 24394  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 24395  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24396  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24397  	LONG $0x37380f66; BYTE $0xce   // pcmpgtq    xmm1, xmm6
 24398  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 24399  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24400  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24401  	LONG $0x15380f66; BYTE $0xfd   // blendvpd    xmm7, xmm5, xmm0
 24402  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24403  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24404  	LONG $0x15380f66; BYTE $0xee   // blendvpd    xmm5, xmm6, xmm0
 24405  	LONG $0x7c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm7
 24406  	LONG $0x6c110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm5
 24407  	LONG $0x08c78348               // add    rdi, 8
 24408  	LONG $0x02c08348               // add    rax, 2
 24409  	JNE  LBB3_477
 24410  	JMP  LBB3_822
 24411  
 24412  LBB3_478:
 24413  	WORD $0xc689                 // mov    esi, eax
 24414  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 24415  	WORD $0xff31                 // xor    edi, edi
 24416  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 24417  	LONG $0x4d6f0f66; BYTE $0x20 // movdqa    xmm1, oword 32[rbp] /* [rip + .LCPI3_3] */
 24418  
 24419  LBB3_479:
 24420  	LONG $0x146f0ff3; BYTE $0xba   // movdqu    xmm2, oword [rdx + 4*rdi]
 24421  	LONG $0x5c6f0ff3; WORD $0x10ba // movdqu    xmm3, oword [rdx + 4*rdi + 16]
 24422  	LONG $0xe26f0f66               // movdqa    xmm4, xmm2
 24423  	LONG $0xe4720f66; BYTE $0x1f   // psrad    xmm4, 31
 24424  	LONG $0xe1eb0f66               // por    xmm4, xmm1
 24425  	LONG $0xeb6f0f66               // movdqa    xmm5, xmm3
 24426  	LONG $0xe5720f66; BYTE $0x1f   // psrad    xmm5, 31
 24427  	LONG $0xe9eb0f66               // por    xmm5, xmm1
 24428  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 24429  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 24430  	LONG $0x04d0c20f               // cmpneqps    xmm2, xmm0
 24431  	WORD $0x540f; BYTE $0xd4       // andps    xmm2, xmm4
 24432  	LONG $0x04d8c20f               // cmpneqps    xmm3, xmm0
 24433  	WORD $0x540f; BYTE $0xdd       // andps    xmm3, xmm5
 24434  	LONG $0xb914110f               // movups    oword [rcx + 4*rdi], xmm2
 24435  	LONG $0xb95c110f; BYTE $0x10   // movups    oword [rcx + 4*rdi + 16], xmm3
 24436  	LONG $0x08c78348               // add    rdi, 8
 24437  	WORD $0x3948; BYTE $0xfe       // cmp    rsi, rdi
 24438  	JNE  LBB3_479
 24439  	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
 24440  	JE   LBB3_923
 24441  	JMP  LBB3_481
 24442  
 24443  LBB3_486:
 24444  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24445  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 24446  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 24447  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24448  	LONG $0x02e8c149         // shr    r8, 2
 24449  	LONG $0x01c08349         // add    r8, 1
 24450  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24451  	JE   LBB3_830
 24452  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24453  	LONG $0xfee08348         // and    rax, -2
 24454  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24455  	WORD $0xff31             // xor    edi, edi
 24456  
 24457  LBB3_488:
 24458  	LONG $0x0c6f0ff3; BYTE $0xfa   // movdqu    xmm1, oword [rdx + 8*rdi]
 24459  	LONG $0x546f0ff3; WORD $0x10fa // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 24460  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24461  	LONG $0xd9fb0f66               // psubq    xmm3, xmm1
 24462  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24463  	LONG $0x15380f66; BYTE $0xcb   // blendvpd    xmm1, xmm3, xmm0
 24464  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24465  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 24466  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 24467  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 24468  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 24469  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 24470  	LONG $0x4c6f0ff3; WORD $0x20fa // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 24471  	LONG $0x546f0ff3; WORD $0x30fa // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 24472  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24473  	LONG $0xd9fb0f66               // psubq    xmm3, xmm1
 24474  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24475  	LONG $0x15380f66; BYTE $0xcb   // blendvpd    xmm1, xmm3, xmm0
 24476  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24477  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 24478  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 24479  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 24480  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 24481  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 24482  	LONG $0x08c78348               // add    rdi, 8
 24483  	LONG $0x02c08348               // add    rax, 2
 24484  	JNE  LBB3_488
 24485  	JMP  LBB3_831
 24486  
 24487  LBB3_489:
 24488  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24489  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24490  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24491  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24492  	LONG $0x03e8c149         // shr    r8, 3
 24493  	LONG $0x01c08349         // add    r8, 1
 24494  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24495  	JE   LBB3_838
 24496  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24497  	LONG $0xfee08348         // and    rax, -2
 24498  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24499  	WORD $0xff31             // xor    edi, edi
 24500  	QUAD $0x0000008085280f66 // movapd    xmm0, oword 128[rbp] /* [rip + .LCPI3_9] */
 24501  
 24502  LBB3_491:
 24503  	LONG $0x0c100f66; BYTE $0xba   // movupd    xmm1, oword [rdx + 4*rdi]
 24504  	LONG $0x54100f66; WORD $0x10ba // movupd    xmm2, oword [rdx + 4*rdi + 16]
 24505  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 24506  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 24507  	LONG $0x0c110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm1
 24508  	LONG $0x54110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm2
 24509  	LONG $0x4c100f66; WORD $0x20ba // movupd    xmm1, oword [rdx + 4*rdi + 32]
 24510  	LONG $0x54100f66; WORD $0x30ba // movupd    xmm2, oword [rdx + 4*rdi + 48]
 24511  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 24512  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 24513  	LONG $0x4c110f66; WORD $0x20b9 // movupd    oword [rcx + 4*rdi + 32], xmm1
 24514  	LONG $0x54110f66; WORD $0x30b9 // movupd    oword [rcx + 4*rdi + 48], xmm2
 24515  	LONG $0x10c78348               // add    rdi, 16
 24516  	LONG $0x02c08348               // add    rax, 2
 24517  	JNE  LBB3_491
 24518  	JMP  LBB3_839
 24519  
 24520  LBB3_492:
 24521  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24522  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 24523  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 24524  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24525  	LONG $0x02e8c149         // shr    r8, 2
 24526  	LONG $0x01c08349         // add    r8, 1
 24527  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24528  	JE   LBB3_848
 24529  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24530  	LONG $0xfee08348         // and    rax, -2
 24531  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24532  	WORD $0xff31             // xor    edi, edi
 24533  
 24534  LBB3_494:
 24535  	LONG $0x0c6f0ff3; BYTE $0xfa   // movdqu    xmm1, oword [rdx + 8*rdi]
 24536  	LONG $0x546f0ff3; WORD $0x10fa // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 24537  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24538  	LONG $0xd9fb0f66               // psubq    xmm3, xmm1
 24539  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24540  	LONG $0x15380f66; BYTE $0xcb   // blendvpd    xmm1, xmm3, xmm0
 24541  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24542  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 24543  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 24544  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 24545  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 24546  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 24547  	LONG $0x4c6f0ff3; WORD $0x20fa // movdqu    xmm1, oword [rdx + 8*rdi + 32]
 24548  	LONG $0x546f0ff3; WORD $0x30fa // movdqu    xmm2, oword [rdx + 8*rdi + 48]
 24549  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24550  	LONG $0xd9fb0f66               // psubq    xmm3, xmm1
 24551  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24552  	LONG $0x15380f66; BYTE $0xcb   // blendvpd    xmm1, xmm3, xmm0
 24553  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 24554  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 24555  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 24556  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 24557  	LONG $0x4c110f66; WORD $0x20f9 // movupd    oword [rcx + 8*rdi + 32], xmm1
 24558  	LONG $0x54110f66; WORD $0x30f9 // movupd    oword [rcx + 8*rdi + 48], xmm2
 24559  	LONG $0x08c78348               // add    rdi, 8
 24560  	LONG $0x02c08348               // add    rax, 2
 24561  	JNE  LBB3_494
 24562  	JMP  LBB3_849
 24563  
 24564  LBB3_495:
 24565  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24566  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24567  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24568  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24569  	LONG $0x03e8c149         // shr    r8, 3
 24570  	LONG $0x01c08349         // add    r8, 1
 24571  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24572  	JE   LBB3_856
 24573  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24574  	LONG $0xfee08348         // and    rax, -2
 24575  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24576  	WORD $0xff31             // xor    edi, edi
 24577  	QUAD $0x0000008085280f66 // movapd    xmm0, oword 128[rbp] /* [rip + .LCPI3_9] */
 24578  
 24579  LBB3_497:
 24580  	LONG $0x0c100f66; BYTE $0xba   // movupd    xmm1, oword [rdx + 4*rdi]
 24581  	LONG $0x54100f66; WORD $0x10ba // movupd    xmm2, oword [rdx + 4*rdi + 16]
 24582  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 24583  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 24584  	LONG $0x0c110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm1
 24585  	LONG $0x54110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm2
 24586  	LONG $0x4c100f66; WORD $0x20ba // movupd    xmm1, oword [rdx + 4*rdi + 32]
 24587  	LONG $0x54100f66; WORD $0x30ba // movupd    xmm2, oword [rdx + 4*rdi + 48]
 24588  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 24589  	LONG $0xd0540f66               // andpd    xmm2, xmm0
 24590  	LONG $0x4c110f66; WORD $0x20b9 // movupd    oword [rcx + 4*rdi + 32], xmm1
 24591  	LONG $0x54110f66; WORD $0x30b9 // movupd    oword [rcx + 4*rdi + 48], xmm2
 24592  	LONG $0x10c78348               // add    rdi, 16
 24593  	LONG $0x02c08348               // add    rax, 2
 24594  	JNE  LBB3_497
 24595  	JMP  LBB3_857
 24596  
 24597  LBB3_505:
 24598  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24599  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 24600  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 24601  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24602  	LONG $0x05e8c149         // shr    r8, 5
 24603  	LONG $0x01c08349         // add    r8, 1
 24604  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24605  	JE   LBB3_866
 24606  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24607  	LONG $0xfee08348         // and    rax, -2
 24608  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24609  	WORD $0xff31             // xor    edi, edi
 24610  
 24611  LBB3_507:
 24612  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 24613  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 24614  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24615  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 24616  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24617  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 24618  	LONG $0x147f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm2
 24619  	LONG $0x447f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm0
 24620  	LONG $0x446f0ff3; WORD $0x203a // movdqu    xmm0, oword [rdx + rdi + 32]
 24621  	LONG $0x4c6f0ff3; WORD $0x303a // movdqu    xmm1, oword [rdx + rdi + 48]
 24622  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24623  	LONG $0xd0f80f66               // psubb    xmm2, xmm0
 24624  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24625  	LONG $0xc1f80f66               // psubb    xmm0, xmm1
 24626  	LONG $0x547f0ff3; WORD $0x2039 // movdqu    oword [rcx + rdi + 32], xmm2
 24627  	LONG $0x447f0ff3; WORD $0x3039 // movdqu    oword [rcx + rdi + 48], xmm0
 24628  	LONG $0x40c78348               // add    rdi, 64
 24629  	LONG $0x02c08348               // add    rax, 2
 24630  	JNE  LBB3_507
 24631  	JMP  LBB3_867
 24632  
 24633  LBB3_508:
 24634  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24635  	WORD $0xe683; BYTE $0xe0     // and    esi, -32
 24636  	LONG $0xe0468d48             // lea    rax, [rsi - 32]
 24637  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24638  	LONG $0x05e8c149             // shr    r8, 5
 24639  	LONG $0x01c08349             // add    r8, 1
 24640  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24641  	JE   LBB3_874
 24642  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24643  	LONG $0xfee08348             // and    rax, -2
 24644  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24645  	WORD $0xff31                 // xor    edi, edi
 24646  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 24647  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI3_6] */
 24648  
 24649  LBB3_510:
 24650  	LONG $0x146f0ff3; BYTE $0x3a   // movdqu    xmm2, oword [rdx + rdi]
 24651  	LONG $0x5c6f0ff3; WORD $0x103a // movdqu    xmm3, oword [rdx + rdi + 16]
 24652  	LONG $0xd0740f66               // pcmpeqb    xmm2, xmm0
 24653  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 24654  	LONG $0xd8740f66               // pcmpeqb    xmm3, xmm0
 24655  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 24656  	LONG $0x147f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm2
 24657  	LONG $0x5c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm3
 24658  	LONG $0x546f0ff3; WORD $0x203a // movdqu    xmm2, oword [rdx + rdi + 32]
 24659  	LONG $0x5c6f0ff3; WORD $0x303a // movdqu    xmm3, oword [rdx + rdi + 48]
 24660  	LONG $0xd0740f66               // pcmpeqb    xmm2, xmm0
 24661  	LONG $0xd1df0f66               // pandn    xmm2, xmm1
 24662  	LONG $0xd8740f66               // pcmpeqb    xmm3, xmm0
 24663  	LONG $0xd9df0f66               // pandn    xmm3, xmm1
 24664  	LONG $0x547f0ff3; WORD $0x2039 // movdqu    oword [rcx + rdi + 32], xmm2
 24665  	LONG $0x5c7f0ff3; WORD $0x3039 // movdqu    oword [rcx + rdi + 48], xmm3
 24666  	LONG $0x40c78348               // add    rdi, 64
 24667  	LONG $0x02c08348               // add    rax, 2
 24668  	JNE  LBB3_510
 24669  	JMP  LBB3_875
 24670  
 24671  LBB3_511:
 24672  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24673  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 24674  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 24675  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 24676  	LONG $0x05efc148         // shr    rdi, 5
 24677  	LONG $0x01c78348         // add    rdi, 1
 24678  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 24679  	LONG $0x03e08341         // and    r8d, 3
 24680  	LONG $0x60f88348         // cmp    rax, 96
 24681  	JAE  LBB3_590
 24682  	WORD $0xc031             // xor    eax, eax
 24683  	JMP  LBB3_592
 24684  
 24685  LBB3_513:
 24686  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24687  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 24688  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 24689  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 24690  	LONG $0x05efc148         // shr    rdi, 5
 24691  	LONG $0x01c78348         // add    rdi, 1
 24692  	WORD $0x8941; BYTE $0xf8 // mov    r8d, edi
 24693  	LONG $0x03e08341         // and    r8d, 3
 24694  	LONG $0x60f88348         // cmp    rax, 96
 24695  	JAE  LBB3_600
 24696  	WORD $0xc031             // xor    eax, eax
 24697  	JMP  LBB3_602
 24698  
 24699  LBB3_515:
 24700  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24701  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24702  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24703  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24704  	LONG $0x03e8c149         // shr    r8, 3
 24705  	LONG $0x01c08349         // add    r8, 1
 24706  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24707  	JE   LBB3_882
 24708  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24709  	LONG $0xfee08348         // and    rax, -2
 24710  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24711  	WORD $0xff31             // xor    edi, edi
 24712  
 24713  LBB3_517:
 24714  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 24715  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 24716  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24717  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 24718  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24719  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 24720  	LONG $0x147f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm2
 24721  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
 24722  	LONG $0x446f0ff3; WORD $0x20ba // movdqu    xmm0, oword [rdx + 4*rdi + 32]
 24723  	LONG $0x4c6f0ff3; WORD $0x30ba // movdqu    xmm1, oword [rdx + 4*rdi + 48]
 24724  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24725  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 24726  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24727  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 24728  	LONG $0x547f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm2
 24729  	LONG $0x447f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm0
 24730  	LONG $0x10c78348               // add    rdi, 16
 24731  	LONG $0x02c08348               // add    rax, 2
 24732  	JNE  LBB3_517
 24733  	JMP  LBB3_883
 24734  
 24735  LBB3_518:
 24736  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24737  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24738  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24739  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24740  	LONG $0x03e8c149         // shr    r8, 3
 24741  	LONG $0x01c08349         // add    r8, 1
 24742  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24743  	JE   LBB3_890
 24744  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24745  	LONG $0xfee08348         // and    rax, -2
 24746  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24747  	WORD $0xff31             // xor    edi, edi
 24748  
 24749  LBB3_520:
 24750  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 24751  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 24752  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24753  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 24754  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24755  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 24756  	LONG $0x147f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm2
 24757  	LONG $0x447f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm0
 24758  	LONG $0x446f0ff3; WORD $0x20ba // movdqu    xmm0, oword [rdx + 4*rdi + 32]
 24759  	LONG $0x4c6f0ff3; WORD $0x30ba // movdqu    xmm1, oword [rdx + 4*rdi + 48]
 24760  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 24761  	LONG $0xd0fa0f66               // psubd    xmm2, xmm0
 24762  	LONG $0xc0ef0f66               // pxor    xmm0, xmm0
 24763  	LONG $0xc1fa0f66               // psubd    xmm0, xmm1
 24764  	LONG $0x547f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm2
 24765  	LONG $0x447f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm0
 24766  	LONG $0x10c78348               // add    rdi, 16
 24767  	LONG $0x02c08348               // add    rax, 2
 24768  	JNE  LBB3_520
 24769  	JMP  LBB3_891
 24770  
 24771  LBB3_521:
 24772  	WORD $0x8944; BYTE $0xce     // mov    esi, r9d
 24773  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 24774  	LONG $0xf8468d48             // lea    rax, [rsi - 8]
 24775  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
 24776  	LONG $0x03e8c149             // shr    r8, 3
 24777  	LONG $0x01c08349             // add    r8, 1
 24778  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 24779  	JE   LBB3_898
 24780  	WORD $0x894c; BYTE $0xc0     // mov    rax, r8
 24781  	LONG $0xfee08348             // and    rax, -2
 24782  	WORD $0xf748; BYTE $0xd8     // neg    rax
 24783  	WORD $0xff31                 // xor    edi, edi
 24784  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 24785  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 24786  	LONG $0x656f0f66; BYTE $0x20 // movdqa    xmm4, oword 32[rbp] /* [rip + .LCPI3_3] */
 24787  
 24788  LBB3_523:
 24789  	LONG $0x2c6f0ff3; BYTE $0xba   // movdqu    xmm5, oword [rdx + 4*rdi]
 24790  	LONG $0x746f0ff3; WORD $0x10ba // movdqu    xmm6, oword [rdx + 4*rdi + 16]
 24791  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24792  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 24793  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 24794  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24795  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24796  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 24797  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 24798  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24799  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24800  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 24801  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24802  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24803  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 24804  	LONG $0xb93c110f               // movups    oword [rcx + 4*rdi], xmm7
 24805  	LONG $0xb96c110f; BYTE $0x10   // movups    oword [rcx + 4*rdi + 16], xmm5
 24806  	LONG $0x6c6f0ff3; WORD $0x20ba // movdqu    xmm5, oword [rdx + 4*rdi + 32]
 24807  	LONG $0x746f0ff3; WORD $0x30ba // movdqu    xmm6, oword [rdx + 4*rdi + 48]
 24808  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 24809  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 24810  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 24811  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 24812  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 24813  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 24814  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 24815  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 24816  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 24817  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 24818  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 24819  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 24820  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 24821  	LONG $0xb97c110f; BYTE $0x20   // movups    oword [rcx + 4*rdi + 32], xmm7
 24822  	LONG $0xb96c110f; BYTE $0x30   // movups    oword [rcx + 4*rdi + 48], xmm5
 24823  	LONG $0x10c78348               // add    rdi, 16
 24824  	LONG $0x02c08348               // add    rax, 2
 24825  	JNE  LBB3_523
 24826  	JMP  LBB3_899
 24827  
 24828  LBB3_524:
 24829  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24830  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24831  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24832  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24833  	LONG $0x03e8c149         // shr    r8, 3
 24834  	LONG $0x01c08349         // add    r8, 1
 24835  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24836  	JE   LBB3_907
 24837  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24838  	LONG $0xfee08348         // and    rax, -2
 24839  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24840  	WORD $0xff31             // xor    edi, edi
 24841  
 24842  LBB3_526:
 24843  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 24844  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 24845  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 24846  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 24847  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
 24848  	LONG $0x4c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm1
 24849  	LONG $0x446f0ff3; WORD $0x20ba // movdqu    xmm0, oword [rdx + 4*rdi + 32]
 24850  	LONG $0x4c6f0ff3; WORD $0x30ba // movdqu    xmm1, oword [rdx + 4*rdi + 48]
 24851  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 24852  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 24853  	LONG $0x447f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm0
 24854  	LONG $0x4c7f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm1
 24855  	LONG $0x10c78348               // add    rdi, 16
 24856  	LONG $0x02c08348               // add    rax, 2
 24857  	JNE  LBB3_526
 24858  	JMP  LBB3_908
 24859  
 24860  LBB3_527:
 24861  	WORD $0x8944; BYTE $0xce // mov    esi, r9d
 24862  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 24863  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 24864  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
 24865  	LONG $0x03e8c149         // shr    r8, 3
 24866  	LONG $0x01c08349         // add    r8, 1
 24867  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 24868  	JE   LBB3_915
 24869  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
 24870  	LONG $0xfee08348         // and    rax, -2
 24871  	WORD $0xf748; BYTE $0xd8 // neg    rax
 24872  	WORD $0xff31             // xor    edi, edi
 24873  
 24874  LBB3_529:
 24875  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 24876  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 24877  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 24878  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 24879  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
 24880  	LONG $0x4c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm1
 24881  	LONG $0x446f0ff3; WORD $0x20ba // movdqu    xmm0, oword [rdx + 4*rdi + 32]
 24882  	LONG $0x4c6f0ff3; WORD $0x30ba // movdqu    xmm1, oword [rdx + 4*rdi + 48]
 24883  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 24884  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 24885  	LONG $0x447f0ff3; WORD $0x20b9 // movdqu    oword [rcx + 4*rdi + 32], xmm0
 24886  	LONG $0x4c7f0ff3; WORD $0x30b9 // movdqu    oword [rcx + 4*rdi + 48], xmm1
 24887  	LONG $0x10c78348               // add    rdi, 16
 24888  	LONG $0x02c08348               // add    rax, 2
 24889  	JNE  LBB3_529
 24890  	JMP  LBB3_916
 24891  
 24892  LBB3_367:
 24893  	LONG $0xf8e78348         // and    rdi, -8
 24894  	WORD $0xf748; BYTE $0xdf // neg    rdi
 24895  	WORD $0xc031             // xor    eax, eax
 24896  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 24897  
 24898  LBB3_368:
 24899  	LONG $0x04110f66; BYTE $0x81         // movupd    oword [rcx + 4*rax], xmm0
 24900  	LONG $0x44110f66; WORD $0x1081       // movupd    oword [rcx + 4*rax + 16], xmm0
 24901  	LONG $0x44110f66; WORD $0x2081       // movupd    oword [rcx + 4*rax + 32], xmm0
 24902  	LONG $0x44110f66; WORD $0x3081       // movupd    oword [rcx + 4*rax + 48], xmm0
 24903  	LONG $0x44110f66; WORD $0x4081       // movupd    oword [rcx + 4*rax + 64], xmm0
 24904  	LONG $0x44110f66; WORD $0x5081       // movupd    oword [rcx + 4*rax + 80], xmm0
 24905  	LONG $0x44110f66; WORD $0x6081       // movupd    oword [rcx + 4*rax + 96], xmm0
 24906  	LONG $0x44110f66; WORD $0x7081       // movupd    oword [rcx + 4*rax + 112], xmm0
 24907  	QUAD $0x0000808184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 128], xmm0
 24908  	QUAD $0x0000908184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 144], xmm0
 24909  	QUAD $0x0000a08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 160], xmm0
 24910  	QUAD $0x0000b08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 176], xmm0
 24911  	QUAD $0x0000c08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 192], xmm0
 24912  	QUAD $0x0000d08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 208], xmm0
 24913  	QUAD $0x0000e08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 224], xmm0
 24914  	QUAD $0x0000f08184110f66; BYTE $0x00 // movupd    oword [rcx + 4*rax + 240], xmm0
 24915  	LONG $0x40c08348                     // add    rax, 64
 24916  	LONG $0x08c78348                     // add    rdi, 8
 24917  	JNE  LBB3_368
 24918  
 24919  LBB3_369:
 24920  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 24921  	JE   LBB3_372
 24922  	LONG $0x81048d48         // lea    rax, [rcx + 4*rax]
 24923  	LONG $0x10c08348         // add    rax, 16
 24924  	WORD $0xf748; BYTE $0xde // neg    rsi
 24925  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 24926  
 24927  LBB3_371:
 24928  	LONG $0x40110f66; BYTE $0xf0 // movupd    oword [rax - 16], xmm0
 24929  	LONG $0x00110f66             // movupd    oword [rax], xmm0
 24930  	LONG $0x20c08348             // add    rax, 32
 24931  	WORD $0xff48; BYTE $0xc6     // inc    rsi
 24932  	JNE  LBB3_371
 24933  
 24934  LBB3_372:
 24935  	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
 24936  	JE   LBB3_923
 24937  
 24938  LBB3_373:
 24939  	LONG $0x009104c7; WORD $0x0000; BYTE $0x00 // mov    dword [rcx + 4*rdx], 0
 24940  	LONG $0x01c28348                           // add    rdx, 1
 24941  	WORD $0x3949; BYTE $0xd1                   // cmp    r9, rdx
 24942  	JNE  LBB3_373
 24943  	JMP  LBB3_923
 24944  
 24945  LBB3_414:
 24946  	LONG $0xf8e78348         // and    rdi, -8
 24947  	WORD $0xf748; BYTE $0xdf // neg    rdi
 24948  	WORD $0xc031             // xor    eax, eax
 24949  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 24950  
 24951  LBB3_415:
 24952  	LONG $0x04110f66; BYTE $0xc1         // movupd    oword [rcx + 8*rax], xmm0
 24953  	LONG $0x44110f66; WORD $0x10c1       // movupd    oword [rcx + 8*rax + 16], xmm0
 24954  	LONG $0x44110f66; WORD $0x20c1       // movupd    oword [rcx + 8*rax + 32], xmm0
 24955  	LONG $0x44110f66; WORD $0x30c1       // movupd    oword [rcx + 8*rax + 48], xmm0
 24956  	LONG $0x44110f66; WORD $0x40c1       // movupd    oword [rcx + 8*rax + 64], xmm0
 24957  	LONG $0x44110f66; WORD $0x50c1       // movupd    oword [rcx + 8*rax + 80], xmm0
 24958  	LONG $0x44110f66; WORD $0x60c1       // movupd    oword [rcx + 8*rax + 96], xmm0
 24959  	LONG $0x44110f66; WORD $0x70c1       // movupd    oword [rcx + 8*rax + 112], xmm0
 24960  	QUAD $0x000080c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 128], xmm0
 24961  	QUAD $0x000090c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 144], xmm0
 24962  	QUAD $0x0000a0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 160], xmm0
 24963  	QUAD $0x0000b0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 176], xmm0
 24964  	QUAD $0x0000c0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 192], xmm0
 24965  	QUAD $0x0000d0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 208], xmm0
 24966  	QUAD $0x0000e0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 224], xmm0
 24967  	QUAD $0x0000f0c184110f66; BYTE $0x00 // movupd    oword [rcx + 8*rax + 240], xmm0
 24968  	LONG $0x20c08348                     // add    rax, 32
 24969  	LONG $0x08c78348                     // add    rdi, 8
 24970  	JNE  LBB3_415
 24971  
 24972  LBB3_416:
 24973  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 24974  	JE   LBB3_419
 24975  	LONG $0xc1048d48         // lea    rax, [rcx + 8*rax]
 24976  	LONG $0x10c08348         // add    rax, 16
 24977  	WORD $0xf748; BYTE $0xde // neg    rsi
 24978  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 24979  
 24980  LBB3_418:
 24981  	LONG $0x40110f66; BYTE $0xf0 // movupd    oword [rax - 16], xmm0
 24982  	LONG $0x00110f66             // movupd    oword [rax], xmm0
 24983  	LONG $0x20c08348             // add    rax, 32
 24984  	WORD $0xff48; BYTE $0xc6     // inc    rsi
 24985  	JNE  LBB3_418
 24986  
 24987  LBB3_419:
 24988  	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
 24989  	JE   LBB3_923
 24990  
 24991  LBB3_420:
 24992  	QUAD $0x00000000d104c748 // mov    qword [rcx + 8*rdx], 0
 24993  	LONG $0x01c28348         // add    rdx, 1
 24994  	WORD $0x3949; BYTE $0xd1 // cmp    r9, rdx
 24995  	JNE  LBB3_420
 24996  	JMP  LBB3_923
 24997  
 24998  LBB3_431:
 24999  	LONG $0xf8e78348         // and    rdi, -8
 25000  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25001  	WORD $0xc031             // xor    eax, eax
 25002  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 25003  
 25004  LBB3_432:
 25005  	LONG $0x04110f66; BYTE $0x41         // movupd    oword [rcx + 2*rax], xmm0
 25006  	LONG $0x44110f66; WORD $0x1041       // movupd    oword [rcx + 2*rax + 16], xmm0
 25007  	LONG $0x44110f66; WORD $0x2041       // movupd    oword [rcx + 2*rax + 32], xmm0
 25008  	LONG $0x44110f66; WORD $0x3041       // movupd    oword [rcx + 2*rax + 48], xmm0
 25009  	LONG $0x44110f66; WORD $0x4041       // movupd    oword [rcx + 2*rax + 64], xmm0
 25010  	LONG $0x44110f66; WORD $0x5041       // movupd    oword [rcx + 2*rax + 80], xmm0
 25011  	LONG $0x44110f66; WORD $0x6041       // movupd    oword [rcx + 2*rax + 96], xmm0
 25012  	LONG $0x44110f66; WORD $0x7041       // movupd    oword [rcx + 2*rax + 112], xmm0
 25013  	QUAD $0x0000804184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 128], xmm0
 25014  	QUAD $0x0000904184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 144], xmm0
 25015  	QUAD $0x0000a04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 160], xmm0
 25016  	QUAD $0x0000b04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 176], xmm0
 25017  	QUAD $0x0000c04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 192], xmm0
 25018  	QUAD $0x0000d04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 208], xmm0
 25019  	QUAD $0x0000e04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 224], xmm0
 25020  	QUAD $0x0000f04184110f66; BYTE $0x00 // movupd    oword [rcx + 2*rax + 240], xmm0
 25021  	LONG $0x80e88348                     // sub    rax, -128
 25022  	LONG $0x08c78348                     // add    rdi, 8
 25023  	JNE  LBB3_432
 25024  
 25025  LBB3_433:
 25026  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 25027  	JE   LBB3_436
 25028  	LONG $0x41048d48         // lea    rax, [rcx + 2*rax]
 25029  	LONG $0x10c08348         // add    rax, 16
 25030  	WORD $0xf748; BYTE $0xde // neg    rsi
 25031  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 25032  
 25033  LBB3_435:
 25034  	LONG $0x40110f66; BYTE $0xf0 // movupd    oword [rax - 16], xmm0
 25035  	LONG $0x00110f66             // movupd    oword [rax], xmm0
 25036  	LONG $0x20c08348             // add    rax, 32
 25037  	WORD $0xff48; BYTE $0xc6     // inc    rsi
 25038  	JNE  LBB3_435
 25039  
 25040  LBB3_436:
 25041  	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
 25042  	JE   LBB3_923
 25043  
 25044  LBB3_437:
 25045  	LONG $0x5104c766; WORD $0x0000 // mov    word [rcx + 2*rdx], 0
 25046  	LONG $0x01c28348               // add    rdx, 1
 25047  	WORD $0x3949; BYTE $0xd1       // cmp    r9, rdx
 25048  	JNE  LBB3_437
 25049  	JMP  LBB3_923
 25050  
 25051  LBB3_498:
 25052  	LONG $0xf8e78348         // and    rdi, -8
 25053  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25054  	WORD $0xc031             // xor    eax, eax
 25055  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 25056  
 25057  LBB3_499:
 25058  	LONG $0x04110f66; BYTE $0x01         // movupd    oword [rcx + rax], xmm0
 25059  	LONG $0x44110f66; WORD $0x1001       // movupd    oword [rcx + rax + 16], xmm0
 25060  	LONG $0x44110f66; WORD $0x2001       // movupd    oword [rcx + rax + 32], xmm0
 25061  	LONG $0x44110f66; WORD $0x3001       // movupd    oword [rcx + rax + 48], xmm0
 25062  	LONG $0x44110f66; WORD $0x4001       // movupd    oword [rcx + rax + 64], xmm0
 25063  	LONG $0x44110f66; WORD $0x5001       // movupd    oword [rcx + rax + 80], xmm0
 25064  	LONG $0x44110f66; WORD $0x6001       // movupd    oword [rcx + rax + 96], xmm0
 25065  	LONG $0x44110f66; WORD $0x7001       // movupd    oword [rcx + rax + 112], xmm0
 25066  	QUAD $0x0000800184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 128], xmm0
 25067  	QUAD $0x0000900184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 144], xmm0
 25068  	QUAD $0x0000a00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 160], xmm0
 25069  	QUAD $0x0000b00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 176], xmm0
 25070  	QUAD $0x0000c00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 192], xmm0
 25071  	QUAD $0x0000d00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 208], xmm0
 25072  	QUAD $0x0000e00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 224], xmm0
 25073  	QUAD $0x0000f00184110f66; BYTE $0x00 // movupd    oword [rcx + rax + 240], xmm0
 25074  	LONG $0x01000548; WORD $0x0000       // add    rax, 256
 25075  	LONG $0x08c78348                     // add    rdi, 8
 25076  	JNE  LBB3_499
 25077  
 25078  LBB3_500:
 25079  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 25080  	JE   LBB3_503
 25081  	WORD $0x0148; BYTE $0xc8 // add    rax, rcx
 25082  	LONG $0x10c08348         // add    rax, 16
 25083  	WORD $0xf748; BYTE $0xde // neg    rsi
 25084  	LONG $0xc0570f66         // xorpd    xmm0, xmm0
 25085  
 25086  LBB3_502:
 25087  	LONG $0x40110f66; BYTE $0xf0 // movupd    oword [rax - 16], xmm0
 25088  	LONG $0x00110f66             // movupd    oword [rax], xmm0
 25089  	LONG $0x20c08348             // add    rax, 32
 25090  	WORD $0xff48; BYTE $0xc6     // inc    rsi
 25091  	JNE  LBB3_502
 25092  
 25093  LBB3_503:
 25094  	WORD $0x394c; BYTE $0xca // cmp    rdx, r9
 25095  	JE   LBB3_923
 25096  
 25097  LBB3_504:
 25098  	LONG $0x001104c6         // mov    byte [rcx + rdx], 0
 25099  	LONG $0x01c28348         // add    rdx, 1
 25100  	WORD $0x3949; BYTE $0xd1 // cmp    r9, rdx
 25101  	JNE  LBB3_504
 25102  
 25103  LBB3_923:
 25104  	RET
 25105  
 25106  LBB3_530:
 25107  	LONG $0xfce78348         // and    rdi, -4
 25108  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25109  	WORD $0xc031             // xor    eax, eax
 25110  
 25111  LBB3_531:
 25112  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
 25113  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
 25114  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
 25115  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
 25116  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
 25117  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
 25118  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
 25119  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
 25120  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
 25121  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
 25122  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
 25123  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
 25124  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
 25125  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
 25126  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
 25127  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
 25128  	LONG $0x20c08348               // add    rax, 32
 25129  	LONG $0x04c78348               // add    rdi, 4
 25130  	JNE  LBB3_531
 25131  
 25132  LBB3_532:
 25133  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25134  	JE   LBB3_535
 25135  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
 25136  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25137  
 25138  LBB3_534:
 25139  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25140  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25141  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25142  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25143  	LONG $0x20c08348               // add    rax, 32
 25144  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25145  	JNE  LBB3_534
 25146  
 25147  LBB3_535:
 25148  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25149  	JE   LBB3_923
 25150  	JMP  LBB3_536
 25151  
 25152  LBB3_540:
 25153  	LONG $0xfce78348         // and    rdi, -4
 25154  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25155  	WORD $0xc031             // xor    eax, eax
 25156  
 25157  LBB3_541:
 25158  	LONG $0x8204100f               // movups    xmm0, oword [rdx + 4*rax]
 25159  	LONG $0x824c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 4*rax + 16]
 25160  	LONG $0x8104110f               // movups    oword [rcx + 4*rax], xmm0
 25161  	LONG $0x814c110f; BYTE $0x10   // movups    oword [rcx + 4*rax + 16], xmm1
 25162  	LONG $0x8244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 4*rax + 32]
 25163  	LONG $0x824c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 4*rax + 48]
 25164  	LONG $0x8144110f; BYTE $0x20   // movups    oword [rcx + 4*rax + 32], xmm0
 25165  	LONG $0x814c110f; BYTE $0x30   // movups    oword [rcx + 4*rax + 48], xmm1
 25166  	LONG $0x8244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 4*rax + 64]
 25167  	LONG $0x824c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 4*rax + 80]
 25168  	LONG $0x8144110f; BYTE $0x40   // movups    oword [rcx + 4*rax + 64], xmm0
 25169  	LONG $0x814c110f; BYTE $0x50   // movups    oword [rcx + 4*rax + 80], xmm1
 25170  	LONG $0x44100f66; WORD $0x6082 // movupd    xmm0, oword [rdx + 4*rax + 96]
 25171  	LONG $0x4c100f66; WORD $0x7082 // movupd    xmm1, oword [rdx + 4*rax + 112]
 25172  	LONG $0x44110f66; WORD $0x6081 // movupd    oword [rcx + 4*rax + 96], xmm0
 25173  	LONG $0x4c110f66; WORD $0x7081 // movupd    oword [rcx + 4*rax + 112], xmm1
 25174  	LONG $0x20c08348               // add    rax, 32
 25175  	LONG $0x04c78348               // add    rdi, 4
 25176  	JNE  LBB3_541
 25177  
 25178  LBB3_542:
 25179  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25180  	JE   LBB3_545
 25181  	QUAD $0x0000001085048d48 // lea    rax, [4*rax + 16]
 25182  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25183  
 25184  LBB3_544:
 25185  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25186  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25187  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25188  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25189  	LONG $0x20c08348               // add    rax, 32
 25190  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25191  	JNE  LBB3_544
 25192  
 25193  LBB3_545:
 25194  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25195  	JE   LBB3_923
 25196  	JMP  LBB3_546
 25197  
 25198  LBB3_550:
 25199  	LONG $0xfce78348         // and    rdi, -4
 25200  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25201  	WORD $0xc031             // xor    eax, eax
 25202  
 25203  LBB3_551:
 25204  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
 25205  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
 25206  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
 25207  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
 25208  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
 25209  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
 25210  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
 25211  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
 25212  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
 25213  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
 25214  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
 25215  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
 25216  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
 25217  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
 25218  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
 25219  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
 25220  	LONG $0x10c08348               // add    rax, 16
 25221  	LONG $0x04c78348               // add    rdi, 4
 25222  	JNE  LBB3_551
 25223  
 25224  LBB3_552:
 25225  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25226  	JE   LBB3_555
 25227  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
 25228  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25229  
 25230  LBB3_554:
 25231  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25232  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25233  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25234  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25235  	LONG $0x20c08348               // add    rax, 32
 25236  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25237  	JNE  LBB3_554
 25238  
 25239  LBB3_555:
 25240  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25241  	JE   LBB3_923
 25242  	JMP  LBB3_556
 25243  
 25244  LBB3_560:
 25245  	LONG $0xfce78348         // and    rdi, -4
 25246  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25247  	WORD $0xc031             // xor    eax, eax
 25248  
 25249  LBB3_561:
 25250  	LONG $0xc204100f               // movups    xmm0, oword [rdx + 8*rax]
 25251  	LONG $0xc24c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 8*rax + 16]
 25252  	LONG $0xc104110f               // movups    oword [rcx + 8*rax], xmm0
 25253  	LONG $0xc14c110f; BYTE $0x10   // movups    oword [rcx + 8*rax + 16], xmm1
 25254  	LONG $0xc244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 8*rax + 32]
 25255  	LONG $0xc24c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 8*rax + 48]
 25256  	LONG $0xc144110f; BYTE $0x20   // movups    oword [rcx + 8*rax + 32], xmm0
 25257  	LONG $0xc14c110f; BYTE $0x30   // movups    oword [rcx + 8*rax + 48], xmm1
 25258  	LONG $0xc244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 8*rax + 64]
 25259  	LONG $0xc24c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 8*rax + 80]
 25260  	LONG $0xc144110f; BYTE $0x40   // movups    oword [rcx + 8*rax + 64], xmm0
 25261  	LONG $0xc14c110f; BYTE $0x50   // movups    oword [rcx + 8*rax + 80], xmm1
 25262  	LONG $0x44100f66; WORD $0x60c2 // movupd    xmm0, oword [rdx + 8*rax + 96]
 25263  	LONG $0x4c100f66; WORD $0x70c2 // movupd    xmm1, oword [rdx + 8*rax + 112]
 25264  	LONG $0x44110f66; WORD $0x60c1 // movupd    oword [rcx + 8*rax + 96], xmm0
 25265  	LONG $0x4c110f66; WORD $0x70c1 // movupd    oword [rcx + 8*rax + 112], xmm1
 25266  	LONG $0x10c08348               // add    rax, 16
 25267  	LONG $0x04c78348               // add    rdi, 4
 25268  	JNE  LBB3_561
 25269  
 25270  LBB3_562:
 25271  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25272  	JE   LBB3_565
 25273  	QUAD $0x00000010c5048d48 // lea    rax, [8*rax + 16]
 25274  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25275  
 25276  LBB3_564:
 25277  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25278  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25279  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25280  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25281  	LONG $0x20c08348               // add    rax, 32
 25282  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25283  	JNE  LBB3_564
 25284  
 25285  LBB3_565:
 25286  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25287  	JE   LBB3_923
 25288  	JMP  LBB3_566
 25289  
 25290  LBB3_570:
 25291  	LONG $0xfce78348         // and    rdi, -4
 25292  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25293  	WORD $0xc031             // xor    eax, eax
 25294  
 25295  LBB3_571:
 25296  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
 25297  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
 25298  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
 25299  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
 25300  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
 25301  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
 25302  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
 25303  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
 25304  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
 25305  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
 25306  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
 25307  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
 25308  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
 25309  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
 25310  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
 25311  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
 25312  	LONG $0x40c08348               // add    rax, 64
 25313  	LONG $0x04c78348               // add    rdi, 4
 25314  	JNE  LBB3_571
 25315  
 25316  LBB3_572:
 25317  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25318  	JE   LBB3_575
 25319  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 25320  	LONG $0x10c08348         // add    rax, 16
 25321  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25322  
 25323  LBB3_574:
 25324  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25325  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25326  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25327  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25328  	LONG $0x20c08348               // add    rax, 32
 25329  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25330  	JNE  LBB3_574
 25331  
 25332  LBB3_575:
 25333  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25334  	JE   LBB3_923
 25335  	JMP  LBB3_576
 25336  
 25337  LBB3_580:
 25338  	LONG $0xfce78348         // and    rdi, -4
 25339  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25340  	WORD $0xc031             // xor    eax, eax
 25341  
 25342  LBB3_581:
 25343  	LONG $0x4204100f               // movups    xmm0, oword [rdx + 2*rax]
 25344  	LONG $0x424c100f; BYTE $0x10   // movups    xmm1, oword [rdx + 2*rax + 16]
 25345  	LONG $0x4104110f               // movups    oword [rcx + 2*rax], xmm0
 25346  	LONG $0x414c110f; BYTE $0x10   // movups    oword [rcx + 2*rax + 16], xmm1
 25347  	LONG $0x4244100f; BYTE $0x20   // movups    xmm0, oword [rdx + 2*rax + 32]
 25348  	LONG $0x424c100f; BYTE $0x30   // movups    xmm1, oword [rdx + 2*rax + 48]
 25349  	LONG $0x4144110f; BYTE $0x20   // movups    oword [rcx + 2*rax + 32], xmm0
 25350  	LONG $0x414c110f; BYTE $0x30   // movups    oword [rcx + 2*rax + 48], xmm1
 25351  	LONG $0x4244100f; BYTE $0x40   // movups    xmm0, oword [rdx + 2*rax + 64]
 25352  	LONG $0x424c100f; BYTE $0x50   // movups    xmm1, oword [rdx + 2*rax + 80]
 25353  	LONG $0x4144110f; BYTE $0x40   // movups    oword [rcx + 2*rax + 64], xmm0
 25354  	LONG $0x414c110f; BYTE $0x50   // movups    oword [rcx + 2*rax + 80], xmm1
 25355  	LONG $0x44100f66; WORD $0x6042 // movupd    xmm0, oword [rdx + 2*rax + 96]
 25356  	LONG $0x4c100f66; WORD $0x7042 // movupd    xmm1, oword [rdx + 2*rax + 112]
 25357  	LONG $0x44110f66; WORD $0x6041 // movupd    oword [rcx + 2*rax + 96], xmm0
 25358  	LONG $0x4c110f66; WORD $0x7041 // movupd    oword [rcx + 2*rax + 112], xmm1
 25359  	LONG $0x40c08348               // add    rax, 64
 25360  	LONG $0x04c78348               // add    rdi, 4
 25361  	JNE  LBB3_581
 25362  
 25363  LBB3_582:
 25364  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25365  	JE   LBB3_585
 25366  	WORD $0x0148; BYTE $0xc0 // add    rax, rax
 25367  	LONG $0x10c08348         // add    rax, 16
 25368  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25369  
 25370  LBB3_584:
 25371  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25372  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25373  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25374  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25375  	LONG $0x20c08348               // add    rax, 32
 25376  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25377  	JNE  LBB3_584
 25378  
 25379  LBB3_585:
 25380  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25381  	JE   LBB3_923
 25382  	JMP  LBB3_586
 25383  
 25384  LBB3_590:
 25385  	LONG $0xfce78348         // and    rdi, -4
 25386  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25387  	WORD $0xc031             // xor    eax, eax
 25388  
 25389  LBB3_591:
 25390  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
 25391  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
 25392  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
 25393  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
 25394  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
 25395  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
 25396  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
 25397  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
 25398  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
 25399  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
 25400  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
 25401  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
 25402  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
 25403  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
 25404  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
 25405  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
 25406  	LONG $0x80e88348               // sub    rax, -128
 25407  	LONG $0x04c78348               // add    rdi, 4
 25408  	JNE  LBB3_591
 25409  
 25410  LBB3_592:
 25411  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25412  	JE   LBB3_595
 25413  	LONG $0x10c08348         // add    rax, 16
 25414  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25415  
 25416  LBB3_594:
 25417  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25418  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25419  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25420  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25421  	LONG $0x20c08348               // add    rax, 32
 25422  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25423  	JNE  LBB3_594
 25424  
 25425  LBB3_595:
 25426  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25427  	JE   LBB3_923
 25428  	JMP  LBB3_596
 25429  
 25430  LBB3_600:
 25431  	LONG $0xfce78348         // and    rdi, -4
 25432  	WORD $0xf748; BYTE $0xdf // neg    rdi
 25433  	WORD $0xc031             // xor    eax, eax
 25434  
 25435  LBB3_601:
 25436  	LONG $0x0204100f               // movups    xmm0, oword [rdx + rax]
 25437  	LONG $0x024c100f; BYTE $0x10   // movups    xmm1, oword [rdx + rax + 16]
 25438  	LONG $0x0104110f               // movups    oword [rcx + rax], xmm0
 25439  	LONG $0x014c110f; BYTE $0x10   // movups    oword [rcx + rax + 16], xmm1
 25440  	LONG $0x0244100f; BYTE $0x20   // movups    xmm0, oword [rdx + rax + 32]
 25441  	LONG $0x024c100f; BYTE $0x30   // movups    xmm1, oword [rdx + rax + 48]
 25442  	LONG $0x0144110f; BYTE $0x20   // movups    oword [rcx + rax + 32], xmm0
 25443  	LONG $0x014c110f; BYTE $0x30   // movups    oword [rcx + rax + 48], xmm1
 25444  	LONG $0x0244100f; BYTE $0x40   // movups    xmm0, oword [rdx + rax + 64]
 25445  	LONG $0x024c100f; BYTE $0x50   // movups    xmm1, oword [rdx + rax + 80]
 25446  	LONG $0x0144110f; BYTE $0x40   // movups    oword [rcx + rax + 64], xmm0
 25447  	LONG $0x014c110f; BYTE $0x50   // movups    oword [rcx + rax + 80], xmm1
 25448  	LONG $0x44100f66; WORD $0x6002 // movupd    xmm0, oword [rdx + rax + 96]
 25449  	LONG $0x4c100f66; WORD $0x7002 // movupd    xmm1, oword [rdx + rax + 112]
 25450  	LONG $0x44110f66; WORD $0x6001 // movupd    oword [rcx + rax + 96], xmm0
 25451  	LONG $0x4c110f66; WORD $0x7001 // movupd    oword [rcx + rax + 112], xmm1
 25452  	LONG $0x80e88348               // sub    rax, -128
 25453  	LONG $0x04c78348               // add    rdi, 4
 25454  	JNE  LBB3_601
 25455  
 25456  LBB3_602:
 25457  	WORD $0x854d; BYTE $0xc0 // test    r8, r8
 25458  	JE   LBB3_605
 25459  	LONG $0x10c08348         // add    rax, 16
 25460  	WORD $0xf749; BYTE $0xd8 // neg    r8
 25461  
 25462  LBB3_604:
 25463  	LONG $0x44100f66; WORD $0xf002 // movupd    xmm0, oword [rdx + rax - 16]
 25464  	LONG $0x0c100f66; BYTE $0x02   // movupd    xmm1, oword [rdx + rax]
 25465  	LONG $0x44110f66; WORD $0xf001 // movupd    oword [rcx + rax - 16], xmm0
 25466  	LONG $0x0c110f66; BYTE $0x01   // movupd    oword [rcx + rax], xmm1
 25467  	LONG $0x20c08348               // add    rax, 32
 25468  	WORD $0xff49; BYTE $0xc0       // inc    r8
 25469  	JNE  LBB3_604
 25470  
 25471  LBB3_605:
 25472  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25473  	JE   LBB3_923
 25474  	JMP  LBB3_606
 25475  
 25476  LBB3_610:
 25477  	WORD $0xff31 // xor    edi, edi
 25478  
 25479  LBB3_611:
 25480  	LONG $0x01c0f641               // test    r8b, 1
 25481  	JE   LBB3_613
 25482  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 25483  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 25484  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25485  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25486  	LONG $0xd8fa0f66               // psubd    xmm3, xmm0
 25487  	LONG $0xd1fa0f66               // psubd    xmm2, xmm1
 25488  	LONG $0x1c7f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm3
 25489  	LONG $0x547f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm2
 25490  
 25491  LBB3_613:
 25492  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25493  	JE   LBB3_923
 25494  	JMP  LBB3_614
 25495  
 25496  LBB3_618:
 25497  	WORD $0xff31 // xor    edi, edi
 25498  
 25499  LBB3_619:
 25500  	LONG $0x01c0f641               // test    r8b, 1
 25501  	JE   LBB3_621
 25502  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 25503  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 25504  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25505  	LONG $0xc2760f66               // pcmpeqd    xmm0, xmm2
 25506  	LONG $0x5d6f0f66; BYTE $0x20   // movdqa    xmm3, oword 32[rbp] /* [rip + .LCPI3_3] */
 25507  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 25508  	LONG $0xca760f66               // pcmpeqd    xmm1, xmm2
 25509  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 25510  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
 25511  	LONG $0x4c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm1
 25512  
 25513  LBB3_621:
 25514  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25515  	JE   LBB3_923
 25516  	JMP  LBB3_622
 25517  
 25518  LBB3_626:
 25519  	WORD $0xff31 // xor    edi, edi
 25520  
 25521  LBB3_627:
 25522  	LONG $0x01c0f641               // test    r8b, 1
 25523  	JE   LBB3_629
 25524  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
 25525  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
 25526  	LONG $0x55280f66; BYTE $0x00   // movapd    xmm2, oword 0[rbp] /* [rip + .LCPI3_0] */
 25527  	LONG $0xc2570f66               // xorpd    xmm0, xmm2
 25528  	LONG $0xca570f66               // xorpd    xmm1, xmm2
 25529  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
 25530  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
 25531  
 25532  LBB3_629:
 25533  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25534  	JE   LBB3_923
 25535  	JMP  LBB3_630
 25536  
 25537  LBB3_636:
 25538  	WORD $0xff31 // xor    edi, edi
 25539  
 25540  LBB3_637:
 25541  	LONG $0x01c0f641               // test    r8b, 1
 25542  	JE   LBB3_639
 25543  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
 25544  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
 25545  	LONG $0x55280f66; BYTE $0x00   // movapd    xmm2, oword 0[rbp] /* [rip + .LCPI3_0] */
 25546  	LONG $0xc2570f66               // xorpd    xmm0, xmm2
 25547  	LONG $0xca570f66               // xorpd    xmm1, xmm2
 25548  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
 25549  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
 25550  
 25551  LBB3_639:
 25552  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25553  	JE   LBB3_923
 25554  	JMP  LBB3_640
 25555  
 25556  LBB3_646:
 25557  	WORD $0xff31 // xor    edi, edi
 25558  
 25559  LBB3_647:
 25560  	LONG $0x01c0f641               // test    r8b, 1
 25561  	JE   LBB3_649
 25562  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
 25563  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
 25564  	LONG $0xd2570f66               // xorpd    xmm2, xmm2
 25565  	LONG $0x5d280f66; BYTE $0x00   // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI3_0] */
 25566  	LONG $0xe0280f66               // movapd    xmm4, xmm0
 25567  	LONG $0xe3540f66               // andpd    xmm4, xmm3
 25568  	LONG $0x6d280f66; BYTE $0x10   // movapd    xmm5, oword 16[rbp] /* [rip + .LCPI3_1] */
 25569  	LONG $0xe5560f66               // orpd    xmm4, xmm5
 25570  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 25571  	LONG $0xdd560f66               // orpd    xmm3, xmm5
 25572  	LONG $0xc2c20f66; BYTE $0x04   // cmpneqpd    xmm0, xmm2
 25573  	LONG $0xc4540f66               // andpd    xmm0, xmm4
 25574  	LONG $0xcac20f66; BYTE $0x04   // cmpneqpd    xmm1, xmm2
 25575  	LONG $0xcb540f66               // andpd    xmm1, xmm3
 25576  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
 25577  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
 25578  
 25579  LBB3_649:
 25580  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25581  	JE   LBB3_923
 25582  	JMP  LBB3_650
 25583  
 25584  LBB3_655:
 25585  	WORD $0xff31 // xor    edi, edi
 25586  
 25587  LBB3_656:
 25588  	LONG $0x01c0f641               // test    r8b, 1
 25589  	JE   LBB3_658
 25590  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
 25591  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
 25592  	LONG $0x55280f66; BYTE $0x70   // movapd    xmm2, oword 112[rbp] /* [rip + .LCPI3_8] */
 25593  	LONG $0xc2540f66               // andpd    xmm0, xmm2
 25594  	LONG $0xca540f66               // andpd    xmm1, xmm2
 25595  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
 25596  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
 25597  
 25598  LBB3_658:
 25599  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25600  	JE   LBB3_923
 25601  	JMP  LBB3_659
 25602  
 25603  LBB3_663:
 25604  	WORD $0xff31 // xor    edi, edi
 25605  
 25606  LBB3_664:
 25607  	LONG $0x01c0f641               // test    r8b, 1
 25608  	JE   LBB3_666
 25609  	LONG $0x04100f66; BYTE $0xfa   // movupd    xmm0, oword [rdx + 8*rdi]
 25610  	LONG $0x4c100f66; WORD $0x10fa // movupd    xmm1, oword [rdx + 8*rdi + 16]
 25611  	LONG $0x55280f66; BYTE $0x70   // movapd    xmm2, oword 112[rbp] /* [rip + .LCPI3_8] */
 25612  	LONG $0xc2540f66               // andpd    xmm0, xmm2
 25613  	LONG $0xca540f66               // andpd    xmm1, xmm2
 25614  	LONG $0x04110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm0
 25615  	LONG $0x4c110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm1
 25616  
 25617  LBB3_666:
 25618  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25619  	JE   LBB3_923
 25620  	JMP  LBB3_667
 25621  
 25622  LBB3_671:
 25623  	WORD $0xff31 // xor    edi, edi
 25624  
 25625  LBB3_672:
 25626  	LONG $0x01c0f641               // test    r8b, 1
 25627  	JE   LBB3_674
 25628  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 25629  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 25630  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25631  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25632  	LONG $0xd8f80f66               // psubb    xmm3, xmm0
 25633  	LONG $0xd1f80f66               // psubb    xmm2, xmm1
 25634  	LONG $0x1c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm3
 25635  	LONG $0x547f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm2
 25636  
 25637  LBB3_674:
 25638  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25639  	JE   LBB3_923
 25640  	JMP  LBB3_675
 25641  
 25642  LBB3_679:
 25643  	WORD $0xff31 // xor    edi, edi
 25644  
 25645  LBB3_680:
 25646  	LONG $0x01c0f641               // test    r8b, 1
 25647  	JE   LBB3_682
 25648  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 25649  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 25650  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25651  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25652  	LONG $0xd8f80f66               // psubb    xmm3, xmm0
 25653  	LONG $0xd1f80f66               // psubb    xmm2, xmm1
 25654  	LONG $0x1c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm3
 25655  	LONG $0x547f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm2
 25656  
 25657  LBB3_682:
 25658  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25659  	JE   LBB3_923
 25660  	JMP  LBB3_683
 25661  
 25662  LBB3_687:
 25663  	WORD $0xff31 // xor    edi, edi
 25664  
 25665  LBB3_688:
 25666  	LONG $0x01c0f641               // test    r8b, 1
 25667  	JE   LBB3_690
 25668  	LONG $0x0c6f0ff3; BYTE $0x3a   // movdqu    xmm1, oword [rdx + rdi]
 25669  	LONG $0x546f0ff3; WORD $0x103a // movdqu    xmm2, oword [rdx + rdi + 16]
 25670  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25671  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI3_6] */
 25672  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 25673  	LONG $0xc1640f66               // pcmpgtb    xmm0, xmm1
 25674  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 25675  	LONG $0xeb740f66               // pcmpeqb    xmm5, xmm3
 25676  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 25677  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 25678  	LONG $0xda740f66               // pcmpeqb    xmm3, xmm2
 25679  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 25680  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 25681  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 25682  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 25683  	LONG $0x10380f66; BYTE $0xd5   // pblendvb    xmm2, xmm5, xmm0
 25684  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 25685  	LONG $0x10380f66; BYTE $0xe3   // pblendvb    xmm4, xmm3, xmm0
 25686  	LONG $0x147f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm2
 25687  	LONG $0x647f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm4
 25688  
 25689  LBB3_690:
 25690  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25691  	JE   LBB3_923
 25692  	JMP  LBB3_691
 25693  
 25694  LBB3_696:
 25695  	WORD $0xff31 // xor    edi, edi
 25696  
 25697  LBB3_697:
 25698  	LONG $0x01c0f641                           // test    r8b, 1
 25699  	JE   LBB3_699
 25700  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x0c // pmovsxbd    xmm3, dword [rdx + rdi + 12]
 25701  	LONG $0x21380f66; WORD $0x3a44; BYTE $0x08 // pmovsxbd    xmm0, dword [rdx + rdi + 8]
 25702  	LONG $0x21380f66; WORD $0x3a54; BYTE $0x04 // pmovsxbd    xmm2, dword [rdx + rdi + 4]
 25703  	LONG $0x21380f66; WORD $0x3a0c             // pmovsxbd    xmm1, dword [rdx + rdi]
 25704  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 25705  	LONG $0xe4720f66; BYTE $0x07               // psrad    xmm4, 7
 25706  	LONG $0xea6f0f66                           // movdqa    xmm5, xmm2
 25707  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 25708  	LONG $0xf06f0f66                           // movdqa    xmm6, xmm0
 25709  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 25710  	LONG $0xfb6f0f66                           // movdqa    xmm7, xmm3
 25711  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 25712  	LONG $0xdffe0f66                           // paddd    xmm3, xmm7
 25713  	LONG $0xc6fe0f66                           // paddd    xmm0, xmm6
 25714  	LONG $0xd5fe0f66                           // paddd    xmm2, xmm5
 25715  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 25716  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 25717  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 25718  	LONG $0xc6ef0f66                           // pxor    xmm0, xmm6
 25719  	LONG $0xdfef0f66                           // pxor    xmm3, xmm7
 25720  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI3_10] */
 25721  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 25722  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 25723  	LONG $0x2b380f66; BYTE $0xc3               // packusdw    xmm0, xmm3
 25724  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 25725  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 25726  	LONG $0x2b380f66; BYTE $0xca               // packusdw    xmm1, xmm2
 25727  	LONG $0xc8670f66                           // packuswb    xmm1, xmm0
 25728  	LONG $0x0c7f0ff3; BYTE $0x39               // movdqu    oword [rcx + rdi], xmm1
 25729  
 25730  LBB3_699:
 25731  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25732  	JE   LBB3_923
 25733  	JMP  LBB3_700
 25734  
 25735  LBB3_704:
 25736  	WORD $0xff31 // xor    edi, edi
 25737  
 25738  LBB3_705:
 25739  	LONG $0x01c0f641                           // test    r8b, 1
 25740  	JE   LBB3_707
 25741  	LONG $0x21380f66; WORD $0x3a5c; BYTE $0x0c // pmovsxbd    xmm3, dword [rdx + rdi + 12]
 25742  	LONG $0x21380f66; WORD $0x3a44; BYTE $0x08 // pmovsxbd    xmm0, dword [rdx + rdi + 8]
 25743  	LONG $0x21380f66; WORD $0x3a54; BYTE $0x04 // pmovsxbd    xmm2, dword [rdx + rdi + 4]
 25744  	LONG $0x21380f66; WORD $0x3a0c             // pmovsxbd    xmm1, dword [rdx + rdi]
 25745  	LONG $0xe16f0f66                           // movdqa    xmm4, xmm1
 25746  	LONG $0xe4720f66; BYTE $0x07               // psrad    xmm4, 7
 25747  	LONG $0xea6f0f66                           // movdqa    xmm5, xmm2
 25748  	LONG $0xe5720f66; BYTE $0x07               // psrad    xmm5, 7
 25749  	LONG $0xf06f0f66                           // movdqa    xmm6, xmm0
 25750  	LONG $0xe6720f66; BYTE $0x07               // psrad    xmm6, 7
 25751  	LONG $0xfb6f0f66                           // movdqa    xmm7, xmm3
 25752  	LONG $0xe7720f66; BYTE $0x07               // psrad    xmm7, 7
 25753  	LONG $0xdffe0f66                           // paddd    xmm3, xmm7
 25754  	LONG $0xc6fe0f66                           // paddd    xmm0, xmm6
 25755  	LONG $0xd5fe0f66                           // paddd    xmm2, xmm5
 25756  	LONG $0xccfe0f66                           // paddd    xmm1, xmm4
 25757  	LONG $0xccef0f66                           // pxor    xmm1, xmm4
 25758  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 25759  	LONG $0xc6ef0f66                           // pxor    xmm0, xmm6
 25760  	LONG $0xdfef0f66                           // pxor    xmm3, xmm7
 25761  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI3_10] */
 25762  	LONG $0xdcdb0f66                           // pand    xmm3, xmm4
 25763  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 25764  	LONG $0x2b380f66; BYTE $0xc3               // packusdw    xmm0, xmm3
 25765  	LONG $0xd4db0f66                           // pand    xmm2, xmm4
 25766  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 25767  	LONG $0x2b380f66; BYTE $0xca               // packusdw    xmm1, xmm2
 25768  	LONG $0xc8670f66                           // packuswb    xmm1, xmm0
 25769  	LONG $0x0c7f0ff3; BYTE $0x39               // movdqu    oword [rcx + rdi], xmm1
 25770  
 25771  LBB3_707:
 25772  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25773  	JE   LBB3_923
 25774  	JMP  LBB3_708
 25775  
 25776  LBB3_712:
 25777  	WORD $0xff31 // xor    edi, edi
 25778  
 25779  LBB3_713:
 25780  	LONG $0x01c0f641               // test    r8b, 1
 25781  	JE   LBB3_715
 25782  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 25783  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 25784  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25785  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25786  	LONG $0xd8fb0f66               // psubq    xmm3, xmm0
 25787  	LONG $0xd1fb0f66               // psubq    xmm2, xmm1
 25788  	LONG $0x1c7f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm3
 25789  	LONG $0x547f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm2
 25790  
 25791  LBB3_715:
 25792  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25793  	JE   LBB3_923
 25794  	JMP  LBB3_716
 25795  
 25796  LBB3_720:
 25797  	WORD $0xff31 // xor    edi, edi
 25798  
 25799  LBB3_721:
 25800  	LONG $0x01c0f641               // test    r8b, 1
 25801  	JE   LBB3_723
 25802  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 25803  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 25804  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25805  	LONG $0x29380f66; BYTE $0xc2   // pcmpeqq    xmm0, xmm2
 25806  	LONG $0x5d6f0f66; BYTE $0x30   // movdqa    xmm3, oword 48[rbp] /* [rip + .LCPI3_4] */
 25807  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 25808  	LONG $0x29380f66; BYTE $0xca   // pcmpeqq    xmm1, xmm2
 25809  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 25810  	LONG $0x047f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm0
 25811  	LONG $0x4c7f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm1
 25812  
 25813  LBB3_723:
 25814  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25815  	JE   LBB3_923
 25816  	JMP  LBB3_724
 25817  
 25818  LBB3_728:
 25819  	WORD $0xff31 // xor    edi, edi
 25820  
 25821  LBB3_729:
 25822  	LONG $0x01c0f641               // test    r8b, 1
 25823  	JE   LBB3_731
 25824  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 25825  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 25826  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25827  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25828  	LONG $0xd8f90f66               // psubw    xmm3, xmm0
 25829  	LONG $0xd1f90f66               // psubw    xmm2, xmm1
 25830  	LONG $0x1c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm3
 25831  	LONG $0x547f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm2
 25832  
 25833  LBB3_731:
 25834  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25835  	JE   LBB3_923
 25836  	JMP  LBB3_732
 25837  
 25838  LBB3_736:
 25839  	WORD $0xff31 // xor    edi, edi
 25840  
 25841  LBB3_737:
 25842  	LONG $0x01c0f641               // test    r8b, 1
 25843  	JE   LBB3_739
 25844  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 25845  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 25846  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25847  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25848  	LONG $0xd8f90f66               // psubw    xmm3, xmm0
 25849  	LONG $0xd1f90f66               // psubw    xmm2, xmm1
 25850  	LONG $0x1c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm3
 25851  	LONG $0x547f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm2
 25852  
 25853  LBB3_739:
 25854  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25855  	JE   LBB3_923
 25856  	JMP  LBB3_740
 25857  
 25858  LBB3_744:
 25859  	WORD $0xff31 // xor    edi, edi
 25860  
 25861  LBB3_745:
 25862  	LONG $0x01c0f641               // test    r8b, 1
 25863  	JE   LBB3_747
 25864  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 25865  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 25866  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25867  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25868  	LONG $0xd8f90f66               // psubw    xmm3, xmm0
 25869  	LONG $0xd1f90f66               // psubw    xmm2, xmm1
 25870  	LONG $0x1c7f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm3
 25871  	LONG $0x547f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm2
 25872  
 25873  LBB3_747:
 25874  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25875  	JE   LBB3_923
 25876  	JMP  LBB3_748
 25877  
 25878  LBB3_752:
 25879  	WORD $0xff31 // xor    edi, edi
 25880  
 25881  LBB3_753:
 25882  	LONG $0x01c0f641               // test    r8b, 1
 25883  	JE   LBB3_755
 25884  	LONG $0x046f0ff3; BYTE $0x7a   // movdqu    xmm0, oword [rdx + 2*rdi]
 25885  	LONG $0x4c6f0ff3; WORD $0x107a // movdqu    xmm1, oword [rdx + 2*rdi + 16]
 25886  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25887  	LONG $0xc2750f66               // pcmpeqw    xmm0, xmm2
 25888  	LONG $0x5d6f0f66; BYTE $0x40   // movdqa    xmm3, oword 64[rbp] /* [rip + .LCPI3_5] */
 25889  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 25890  	LONG $0xca750f66               // pcmpeqw    xmm1, xmm2
 25891  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 25892  	LONG $0x047f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm0
 25893  	LONG $0x4c7f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm1
 25894  
 25895  LBB3_755:
 25896  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25897  	JE   LBB3_923
 25898  	JMP  LBB3_756
 25899  
 25900  LBB3_760:
 25901  	WORD $0xff31 // xor    edi, edi
 25902  
 25903  LBB3_761:
 25904  	LONG $0x01c0f641               // test    r8b, 1
 25905  	JE   LBB3_763
 25906  	LONG $0x0c6f0ff3; BYTE $0x7a   // movdqu    xmm1, oword [rdx + 2*rdi]
 25907  	LONG $0x546f0ff3; WORD $0x107a // movdqu    xmm2, oword [rdx + 2*rdi + 16]
 25908  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25909  	LONG $0x656f0f66; BYTE $0x40   // movdqa    xmm4, oword 64[rbp] /* [rip + .LCPI3_5] */
 25910  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 25911  	LONG $0xc1650f66               // pcmpgtw    xmm0, xmm1
 25912  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 25913  	LONG $0xeb750f66               // pcmpeqw    xmm5, xmm3
 25914  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 25915  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 25916  	LONG $0xda750f66               // pcmpeqw    xmm3, xmm2
 25917  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 25918  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 25919  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 25920  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 25921  	LONG $0x10380f66; BYTE $0xd5   // pblendvb    xmm2, xmm5, xmm0
 25922  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 25923  	LONG $0x10380f66; BYTE $0xe3   // pblendvb    xmm4, xmm3, xmm0
 25924  	LONG $0x147f0ff3; BYTE $0x79   // movdqu    oword [rcx + 2*rdi], xmm2
 25925  	LONG $0x647f0ff3; WORD $0x1079 // movdqu    oword [rcx + 2*rdi + 16], xmm4
 25926  
 25927  LBB3_763:
 25928  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25929  	JE   LBB3_923
 25930  	JMP  LBB3_764
 25931  
 25932  LBB3_769:
 25933  	WORD $0xff31 // xor    edi, edi
 25934  
 25935  LBB3_770:
 25936  	LONG $0x01c0f641                           // test    r8b, 1
 25937  	JE   LBB3_772
 25938  	LONG $0x23380f66; WORD $0x7a44; BYTE $0x08 // pmovsxwd    xmm0, qword [rdx + 2*rdi + 8]
 25939  	LONG $0x23380f66; WORD $0x7a0c             // pmovsxwd    xmm1, qword [rdx + 2*rdi]
 25940  	LONG $0xd16f0f66                           // movdqa    xmm2, xmm1
 25941  	LONG $0xe2720f66; BYTE $0x0f               // psrad    xmm2, 15
 25942  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 25943  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 25944  	LONG $0xc3fe0f66                           // paddd    xmm0, xmm3
 25945  	LONG $0xcafe0f66                           // paddd    xmm1, xmm2
 25946  	LONG $0xcaef0f66                           // pxor    xmm1, xmm2
 25947  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 25948  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 25949  	LONG $0x0e3a0f66; WORD $0xaac2             // pblendw    xmm0, xmm2, 170
 25950  	LONG $0x0e3a0f66; WORD $0xaaca             // pblendw    xmm1, xmm2, 170
 25951  	LONG $0x2b380f66; BYTE $0xc8               // packusdw    xmm1, xmm0
 25952  	LONG $0x0c7f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm1
 25953  
 25954  LBB3_772:
 25955  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25956  	JE   LBB3_923
 25957  	JMP  LBB3_773
 25958  
 25959  LBB3_777:
 25960  	WORD $0xff31 // xor    edi, edi
 25961  
 25962  LBB3_778:
 25963  	LONG $0x01c0f641                           // test    r8b, 1
 25964  	JE   LBB3_780
 25965  	LONG $0x23380f66; WORD $0x7a44; BYTE $0x08 // pmovsxwd    xmm0, qword [rdx + 2*rdi + 8]
 25966  	LONG $0x23380f66; WORD $0x7a0c             // pmovsxwd    xmm1, qword [rdx + 2*rdi]
 25967  	LONG $0xd16f0f66                           // movdqa    xmm2, xmm1
 25968  	LONG $0xe2720f66; BYTE $0x0f               // psrad    xmm2, 15
 25969  	LONG $0xd86f0f66                           // movdqa    xmm3, xmm0
 25970  	LONG $0xe3720f66; BYTE $0x0f               // psrad    xmm3, 15
 25971  	LONG $0xc3fe0f66                           // paddd    xmm0, xmm3
 25972  	LONG $0xcafe0f66                           // paddd    xmm1, xmm2
 25973  	LONG $0xcaef0f66                           // pxor    xmm1, xmm2
 25974  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 25975  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 25976  	LONG $0x0e3a0f66; WORD $0xaac2             // pblendw    xmm0, xmm2, 170
 25977  	LONG $0x0e3a0f66; WORD $0xaaca             // pblendw    xmm1, xmm2, 170
 25978  	LONG $0x2b380f66; BYTE $0xc8               // packusdw    xmm1, xmm0
 25979  	LONG $0x0c7f0ff3; BYTE $0x79               // movdqu    oword [rcx + 2*rdi], xmm1
 25980  
 25981  LBB3_780:
 25982  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 25983  	JE   LBB3_923
 25984  	JMP  LBB3_781
 25985  
 25986  LBB3_785:
 25987  	WORD $0xff31 // xor    edi, edi
 25988  
 25989  LBB3_786:
 25990  	LONG $0x01c0f641               // test    r8b, 1
 25991  	JE   LBB3_788
 25992  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 25993  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 25994  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 25995  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 25996  	LONG $0xd8fb0f66               // psubq    xmm3, xmm0
 25997  	LONG $0xd1fb0f66               // psubq    xmm2, xmm1
 25998  	LONG $0x1c7f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm3
 25999  	LONG $0x547f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm2
 26000  
 26001  LBB3_788:
 26002  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26003  	JE   LBB3_923
 26004  	JMP  LBB3_789
 26005  
 26006  LBB3_793:
 26007  	WORD $0xff31 // xor    edi, edi
 26008  
 26009  LBB3_794:
 26010  	LONG $0x01c0f641               // test    r8b, 1
 26011  	JE   LBB3_796
 26012  	LONG $0x04100f66; BYTE $0xba   // movupd    xmm0, oword [rdx + 4*rdi]
 26013  	LONG $0x4c100f66; WORD $0x10ba // movupd    xmm1, oword [rdx + 4*rdi + 16]
 26014  	LONG $0x55280f66; BYTE $0x60   // movapd    xmm2, oword 96[rbp] /* [rip + .LCPI3_7] */
 26015  	LONG $0xc2570f66               // xorpd    xmm0, xmm2
 26016  	LONG $0xca570f66               // xorpd    xmm1, xmm2
 26017  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
 26018  	LONG $0x4c110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm1
 26019  
 26020  LBB3_796:
 26021  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26022  	JE   LBB3_923
 26023  	JMP  LBB3_797
 26024  
 26025  LBB3_803:
 26026  	WORD $0xff31 // xor    edi, edi
 26027  
 26028  LBB3_804:
 26029  	LONG $0x01c0f641               // test    r8b, 1
 26030  	JE   LBB3_806
 26031  	LONG $0x046f0ff3; BYTE $0xfa   // movdqu    xmm0, oword [rdx + 8*rdi]
 26032  	LONG $0x4c6f0ff3; WORD $0x10fa // movdqu    xmm1, oword [rdx + 8*rdi + 16]
 26033  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 26034  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26035  	LONG $0xd8fb0f66               // psubq    xmm3, xmm0
 26036  	LONG $0xd1fb0f66               // psubq    xmm2, xmm1
 26037  	LONG $0x1c7f0ff3; BYTE $0xf9   // movdqu    oword [rcx + 8*rdi], xmm3
 26038  	LONG $0x547f0ff3; WORD $0x10f9 // movdqu    oword [rcx + 8*rdi + 16], xmm2
 26039  
 26040  LBB3_806:
 26041  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26042  	JE   LBB3_923
 26043  	JMP  LBB3_807
 26044  
 26045  LBB3_811:
 26046  	WORD $0xff31 // xor    edi, edi
 26047  
 26048  LBB3_812:
 26049  	LONG $0x01c0f641               // test    r8b, 1
 26050  	JE   LBB3_814
 26051  	LONG $0x04100f66; BYTE $0xba   // movupd    xmm0, oword [rdx + 4*rdi]
 26052  	LONG $0x4c100f66; WORD $0x10ba // movupd    xmm1, oword [rdx + 4*rdi + 16]
 26053  	LONG $0x55280f66; BYTE $0x60   // movapd    xmm2, oword 96[rbp] /* [rip + .LCPI3_7] */
 26054  	LONG $0xc2570f66               // xorpd    xmm0, xmm2
 26055  	LONG $0xca570f66               // xorpd    xmm1, xmm2
 26056  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
 26057  	LONG $0x4c110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm1
 26058  
 26059  LBB3_814:
 26060  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26061  	JE   LBB3_923
 26062  	JMP  LBB3_815
 26063  
 26064  LBB3_821:
 26065  	WORD $0xff31 // xor    edi, edi
 26066  
 26067  LBB3_822:
 26068  	LONG $0x01c0f641               // test    r8b, 1
 26069  	JE   LBB3_824
 26070  	LONG $0x0c6f0ff3; BYTE $0xfa   // movdqu    xmm1, oword [rdx + 8*rdi]
 26071  	LONG $0x546f0ff3; WORD $0x10fa // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 26072  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26073  	LONG $0x656f0f66; BYTE $0x30   // movdqa    xmm4, oword 48[rbp] /* [rip + .LCPI3_4] */
 26074  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 26075  	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
 26076  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 26077  	LONG $0x29380f66; BYTE $0xeb   // pcmpeqq    xmm5, xmm3
 26078  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 26079  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 26080  	LONG $0x29380f66; BYTE $0xda   // pcmpeqq    xmm3, xmm2
 26081  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 26082  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 26083  	LONG $0x37380f66; BYTE $0xca   // pcmpgtq    xmm1, xmm2
 26084  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 26085  	LONG $0x15380f66; BYTE $0xd5   // blendvpd    xmm2, xmm5, xmm0
 26086  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 26087  	LONG $0x15380f66; BYTE $0xe3   // blendvpd    xmm4, xmm3, xmm0
 26088  	LONG $0x14110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm2
 26089  	LONG $0x64110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm4
 26090  
 26091  LBB3_824:
 26092  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26093  	JE   LBB3_923
 26094  	JMP  LBB3_825
 26095  
 26096  LBB3_830:
 26097  	WORD $0xff31 // xor    edi, edi
 26098  
 26099  LBB3_831:
 26100  	LONG $0x01c0f641               // test    r8b, 1
 26101  	JE   LBB3_833
 26102  	LONG $0x0c6f0ff3; BYTE $0xfa   // movdqu    xmm1, oword [rdx + 8*rdi]
 26103  	LONG $0x546f0ff3; WORD $0x10fa // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 26104  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26105  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 26106  	LONG $0xe1fb0f66               // psubq    xmm4, xmm1
 26107  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 26108  	LONG $0x15380f66; BYTE $0xcc   // blendvpd    xmm1, xmm4, xmm0
 26109  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 26110  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 26111  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 26112  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 26113  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 26114  
 26115  LBB3_833:
 26116  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26117  	JE   LBB3_923
 26118  	JMP  LBB3_834
 26119  
 26120  LBB3_838:
 26121  	WORD $0xff31 // xor    edi, edi
 26122  
 26123  LBB3_839:
 26124  	LONG $0x01c0f641               // test    r8b, 1
 26125  	JE   LBB3_841
 26126  	LONG $0x04100f66; BYTE $0xba   // movupd    xmm0, oword [rdx + 4*rdi]
 26127  	LONG $0x4c100f66; WORD $0x10ba // movupd    xmm1, oword [rdx + 4*rdi + 16]
 26128  	QUAD $0x0000008095280f66       // movapd    xmm2, oword 128[rbp] /* [rip + .LCPI3_9] */
 26129  	LONG $0xc2540f66               // andpd    xmm0, xmm2
 26130  	LONG $0xca540f66               // andpd    xmm1, xmm2
 26131  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
 26132  	LONG $0x4c110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm1
 26133  
 26134  LBB3_841:
 26135  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26136  	JE   LBB3_923
 26137  	JMP  LBB3_842
 26138  
 26139  LBB3_848:
 26140  	WORD $0xff31 // xor    edi, edi
 26141  
 26142  LBB3_849:
 26143  	LONG $0x01c0f641               // test    r8b, 1
 26144  	JE   LBB3_851
 26145  	LONG $0x0c6f0ff3; BYTE $0xfa   // movdqu    xmm1, oword [rdx + 8*rdi]
 26146  	LONG $0x546f0ff3; WORD $0x10fa // movdqu    xmm2, oword [rdx + 8*rdi + 16]
 26147  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26148  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 26149  	LONG $0xe1fb0f66               // psubq    xmm4, xmm1
 26150  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 26151  	LONG $0x15380f66; BYTE $0xcc   // blendvpd    xmm1, xmm4, xmm0
 26152  	LONG $0xdafb0f66               // psubq    xmm3, xmm2
 26153  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 26154  	LONG $0x15380f66; BYTE $0xd3   // blendvpd    xmm2, xmm3, xmm0
 26155  	LONG $0x0c110f66; BYTE $0xf9   // movupd    oword [rcx + 8*rdi], xmm1
 26156  	LONG $0x54110f66; WORD $0x10f9 // movupd    oword [rcx + 8*rdi + 16], xmm2
 26157  
 26158  LBB3_851:
 26159  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26160  	JE   LBB3_923
 26161  	JMP  LBB3_852
 26162  
 26163  LBB3_856:
 26164  	WORD $0xff31 // xor    edi, edi
 26165  
 26166  LBB3_857:
 26167  	LONG $0x01c0f641               // test    r8b, 1
 26168  	JE   LBB3_859
 26169  	LONG $0x04100f66; BYTE $0xba   // movupd    xmm0, oword [rdx + 4*rdi]
 26170  	LONG $0x4c100f66; WORD $0x10ba // movupd    xmm1, oword [rdx + 4*rdi + 16]
 26171  	QUAD $0x0000008095280f66       // movapd    xmm2, oword 128[rbp] /* [rip + .LCPI3_9] */
 26172  	LONG $0xc2540f66               // andpd    xmm0, xmm2
 26173  	LONG $0xca540f66               // andpd    xmm1, xmm2
 26174  	LONG $0x04110f66; BYTE $0xb9   // movupd    oword [rcx + 4*rdi], xmm0
 26175  	LONG $0x4c110f66; WORD $0x10b9 // movupd    oword [rcx + 4*rdi + 16], xmm1
 26176  
 26177  LBB3_859:
 26178  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26179  	JE   LBB3_923
 26180  	JMP  LBB3_860
 26181  
 26182  LBB3_866:
 26183  	WORD $0xff31 // xor    edi, edi
 26184  
 26185  LBB3_867:
 26186  	LONG $0x01c0f641               // test    r8b, 1
 26187  	JE   LBB3_869
 26188  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 26189  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 26190  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 26191  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26192  	LONG $0xd8f80f66               // psubb    xmm3, xmm0
 26193  	LONG $0xd1f80f66               // psubb    xmm2, xmm1
 26194  	LONG $0x1c7f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm3
 26195  	LONG $0x547f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm2
 26196  
 26197  LBB3_869:
 26198  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26199  	JE   LBB3_923
 26200  	JMP  LBB3_870
 26201  
 26202  LBB3_874:
 26203  	WORD $0xff31 // xor    edi, edi
 26204  
 26205  LBB3_875:
 26206  	LONG $0x01c0f641               // test    r8b, 1
 26207  	JE   LBB3_877
 26208  	LONG $0x046f0ff3; BYTE $0x3a   // movdqu    xmm0, oword [rdx + rdi]
 26209  	LONG $0x4c6f0ff3; WORD $0x103a // movdqu    xmm1, oword [rdx + rdi + 16]
 26210  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 26211  	LONG $0xc2740f66               // pcmpeqb    xmm0, xmm2
 26212  	LONG $0x5d6f0f66; BYTE $0x50   // movdqa    xmm3, oword 80[rbp] /* [rip + .LCPI3_6] */
 26213  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 26214  	LONG $0xca740f66               // pcmpeqb    xmm1, xmm2
 26215  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 26216  	LONG $0x047f0ff3; BYTE $0x39   // movdqu    oword [rcx + rdi], xmm0
 26217  	LONG $0x4c7f0ff3; WORD $0x1039 // movdqu    oword [rcx + rdi + 16], xmm1
 26218  
 26219  LBB3_877:
 26220  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26221  	JE   LBB3_923
 26222  	JMP  LBB3_878
 26223  
 26224  LBB3_882:
 26225  	WORD $0xff31 // xor    edi, edi
 26226  
 26227  LBB3_883:
 26228  	LONG $0x01c0f641               // test    r8b, 1
 26229  	JE   LBB3_885
 26230  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 26231  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 26232  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 26233  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26234  	LONG $0xd8fa0f66               // psubd    xmm3, xmm0
 26235  	LONG $0xd1fa0f66               // psubd    xmm2, xmm1
 26236  	LONG $0x1c7f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm3
 26237  	LONG $0x547f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm2
 26238  
 26239  LBB3_885:
 26240  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26241  	JE   LBB3_923
 26242  	JMP  LBB3_886
 26243  
 26244  LBB3_890:
 26245  	WORD $0xff31 // xor    edi, edi
 26246  
 26247  LBB3_891:
 26248  	LONG $0x01c0f641               // test    r8b, 1
 26249  	JE   LBB3_893
 26250  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 26251  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 26252  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 26253  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26254  	LONG $0xd8fa0f66               // psubd    xmm3, xmm0
 26255  	LONG $0xd1fa0f66               // psubd    xmm2, xmm1
 26256  	LONG $0x1c7f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm3
 26257  	LONG $0x547f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm2
 26258  
 26259  LBB3_893:
 26260  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26261  	JE   LBB3_923
 26262  	JMP  LBB3_894
 26263  
 26264  LBB3_898:
 26265  	WORD $0xff31 // xor    edi, edi
 26266  
 26267  LBB3_899:
 26268  	LONG $0x01c0f641               // test    r8b, 1
 26269  	JE   LBB3_901
 26270  	LONG $0x0c6f0ff3; BYTE $0xba   // movdqu    xmm1, oword [rdx + 4*rdi]
 26271  	LONG $0x546f0ff3; WORD $0x10ba // movdqu    xmm2, oword [rdx + 4*rdi + 16]
 26272  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 26273  	LONG $0x656f0f66; BYTE $0x20   // movdqa    xmm4, oword 32[rbp] /* [rip + .LCPI3_3] */
 26274  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 26275  	LONG $0xc1660f66               // pcmpgtd    xmm0, xmm1
 26276  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 26277  	LONG $0xeb760f66               // pcmpeqd    xmm5, xmm3
 26278  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 26279  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 26280  	LONG $0xda760f66               // pcmpeqd    xmm3, xmm2
 26281  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 26282  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 26283  	LONG $0xca660f66               // pcmpgtd    xmm1, xmm2
 26284  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 26285  	LONG $0x14380f66; BYTE $0xd5   // blendvps    xmm2, xmm5, xmm0
 26286  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 26287  	LONG $0x14380f66; BYTE $0xe3   // blendvps    xmm4, xmm3, xmm0
 26288  	LONG $0xb914110f               // movups    oword [rcx + 4*rdi], xmm2
 26289  	LONG $0xb964110f; BYTE $0x10   // movups    oword [rcx + 4*rdi + 16], xmm4
 26290  
 26291  LBB3_901:
 26292  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26293  	JE   LBB3_923
 26294  	JMP  LBB3_902
 26295  
 26296  LBB3_907:
 26297  	WORD $0xff31 // xor    edi, edi
 26298  
 26299  LBB3_908:
 26300  	LONG $0x01c0f641               // test    r8b, 1
 26301  	JE   LBB3_910
 26302  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 26303  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 26304  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 26305  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 26306  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
 26307  	LONG $0x4c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm1
 26308  
 26309  LBB3_910:
 26310  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26311  	JE   LBB3_923
 26312  	JMP  LBB3_911
 26313  
 26314  LBB3_915:
 26315  	WORD $0xff31 // xor    edi, edi
 26316  
 26317  LBB3_916:
 26318  	LONG $0x01c0f641               // test    r8b, 1
 26319  	JE   LBB3_918
 26320  	LONG $0x046f0ff3; BYTE $0xba   // movdqu    xmm0, oword [rdx + 4*rdi]
 26321  	LONG $0x4c6f0ff3; WORD $0x10ba // movdqu    xmm1, oword [rdx + 4*rdi + 16]
 26322  	LONG $0x1e380f66; BYTE $0xc0   // pabsd    xmm0, xmm0
 26323  	LONG $0x1e380f66; BYTE $0xc9   // pabsd    xmm1, xmm1
 26324  	LONG $0x047f0ff3; BYTE $0xb9   // movdqu    oword [rcx + 4*rdi], xmm0
 26325  	LONG $0x4c7f0ff3; WORD $0x10b9 // movdqu    oword [rcx + 4*rdi + 16], xmm1
 26326  
 26327  LBB3_918:
 26328  	WORD $0x394c; BYTE $0xce // cmp    rsi, r9
 26329  	JE   LBB3_923
 26330  	JMP  LBB3_919
 26331  
 26332  DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
 26333  DATA LCDATA5<>+0x008(SB)/8, $0x8000000000000000
 26334  DATA LCDATA5<>+0x010(SB)/8, $0x3ff0000000000000
 26335  DATA LCDATA5<>+0x018(SB)/8, $0x3ff0000000000000
 26336  DATA LCDATA5<>+0x020(SB)/8, $0x7fffffff7fffffff
 26337  DATA LCDATA5<>+0x028(SB)/8, $0x7fffffff7fffffff
 26338  DATA LCDATA5<>+0x030(SB)/8, $0x8000000080000000
 26339  DATA LCDATA5<>+0x038(SB)/8, $0x8000000080000000
 26340  DATA LCDATA5<>+0x040(SB)/8, $0x0000000000000400
 26341  DATA LCDATA5<>+0x048(SB)/8, $0x0000000000000000
 26342  DATA LCDATA5<>+0x050(SB)/8, $0x0000000100000001
 26343  DATA LCDATA5<>+0x058(SB)/8, $0x0000000100000001
 26344  DATA LCDATA5<>+0x060(SB)/8, $0x4f0000004f000000
 26345  DATA LCDATA5<>+0x068(SB)/8, $0x4f0000004f000000
 26346  DATA LCDATA5<>+0x070(SB)/8, $0x0001000100010001
 26347  DATA LCDATA5<>+0x078(SB)/8, $0x0000000000000000
 26348  DATA LCDATA5<>+0x080(SB)/8, $0x0000000001010101
 26349  DATA LCDATA5<>+0x088(SB)/8, $0x0000000000000000
 26350  DATA LCDATA5<>+0x090(SB)/8, $0x0000000000000001
 26351  DATA LCDATA5<>+0x098(SB)/8, $0x0000000000000001
 26352  DATA LCDATA5<>+0x0a0(SB)/8, $0x0000000100000001
 26353  DATA LCDATA5<>+0x0a8(SB)/8, $0x0000000000000000
 26354  DATA LCDATA5<>+0x0b0(SB)/8, $0x0000000000010001
 26355  DATA LCDATA5<>+0x0b8(SB)/8, $0x0000000000000000
 26356  DATA LCDATA5<>+0x0c0(SB)/8, $0x0000000000000101
 26357  DATA LCDATA5<>+0x0c8(SB)/8, $0x0000000000000000
 26358  DATA LCDATA5<>+0x0d0(SB)/8, $0x3f8000003f800000
 26359  DATA LCDATA5<>+0x0d8(SB)/8, $0x3f8000003f800000
 26360  DATA LCDATA5<>+0x0e0(SB)/8, $0x0001000100010001
 26361  DATA LCDATA5<>+0x0e8(SB)/8, $0x0001000100010001
 26362  DATA LCDATA5<>+0x0f0(SB)/8, $0x0101010101010101
 26363  DATA LCDATA5<>+0x0f8(SB)/8, $0x0000000000000000
 26364  DATA LCDATA5<>+0x100(SB)/8, $0x0101010101010101
 26365  DATA LCDATA5<>+0x108(SB)/8, $0x0101010101010101
 26366  DATA LCDATA5<>+0x110(SB)/8, $0x3ff0000000000000
 26367  DATA LCDATA5<>+0x118(SB)/8, $0x43e0000000000000
 26368  DATA LCDATA5<>+0x120(SB)/8, $0xbff0000000000000
 26369  DATA LCDATA5<>+0x128(SB)/8, $0x5f0000003f800000
 26370  DATA LCDATA5<>+0x130(SB)/8, $0x00000000bf800000
 26371  GLOBL LCDATA5<>(SB), 8, $312
 26372  
 26373  TEXT ยท_arithmetic_unary_diff_type_sse4(SB), $0-48
 26374  
 26375  	MOVQ itype+0(FP), DI
 26376  	MOVQ otype+8(FP), SI
 26377  	MOVQ op+16(FP), DX
 26378  	MOVQ input+24(FP), CX
 26379  	MOVQ output+32(FP), R8
 26380  	MOVQ len+40(FP), R9
 26381  	LEAQ LCDATA5<>(SB), BP
 26382  
 26383  	WORD $0xfa80; BYTE $0x14 // cmp    dl, 20
 26384  	JNE  LBB4_1655
 26385  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 26386  	JG   LBB4_14
 26387  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 26388  	JLE  LBB4_26
 26389  	WORD $0xff83; BYTE $0x04 // cmp    edi, 4
 26390  	JE   LBB4_46
 26391  	WORD $0xff83; BYTE $0x05 // cmp    edi, 5
 26392  	JE   LBB4_54
 26393  	WORD $0xff83; BYTE $0x06 // cmp    edi, 6
 26394  	JNE  LBB4_1655
 26395  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26396  	JG   LBB4_94
 26397  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26398  	JLE  LBB4_200
 26399  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26400  	JE   LBB4_303
 26401  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26402  	JE   LBB4_306
 26403  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26404  	JNE  LBB4_1655
 26405  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26406  	JLE  LBB4_1655
 26407  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 26408  	LONG $0x08f98341         // cmp    r9d, 8
 26409  	JB   LBB4_13
 26410  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 26411  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 26412  	JBE  LBB4_496
 26413  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 26414  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 26415  	JBE  LBB4_496
 26416  
 26417  LBB4_13:
 26418  	WORD $0xd231 // xor    edx, edx
 26419  
 26420  LBB4_1232:
 26421  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 26422  	WORD $0xf748; BYTE $0xd6 // not    rsi
 26423  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 26424  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 26425  	LONG $0x03e78348         // and    rdi, 3
 26426  	JE   LBB4_1234
 26427  
 26428  LBB4_1233:
 26429  	WORD $0xc031             // xor    eax, eax
 26430  	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
 26431  	WORD $0x950f; BYTE $0xd0 // setne    al
 26432  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 26433  	LONG $0x01c28348         // add    rdx, 1
 26434  	LONG $0xffc78348         // add    rdi, -1
 26435  	JNE  LBB4_1233
 26436  
 26437  LBB4_1234:
 26438  	LONG $0x03fe8348 // cmp    rsi, 3
 26439  	JB   LBB4_1655
 26440  
 26441  LBB4_1235:
 26442  	WORD $0xc031                 // xor    eax, eax
 26443  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 26444  	WORD $0x950f; BYTE $0xd0     // setne    al
 26445  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 26446  	WORD $0xc031                 // xor    eax, eax
 26447  	LONG $0x04917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 4], 0
 26448  	WORD $0x950f; BYTE $0xd0     // setne    al
 26449  	LONG $0x90448941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], eax
 26450  	WORD $0xc031                 // xor    eax, eax
 26451  	LONG $0x08917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 8], 0
 26452  	WORD $0x950f; BYTE $0xd0     // setne    al
 26453  	LONG $0x90448941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], eax
 26454  	WORD $0xc031                 // xor    eax, eax
 26455  	LONG $0x0c917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 12], 0
 26456  	WORD $0x950f; BYTE $0xd0     // setne    al
 26457  	LONG $0x90448941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], eax
 26458  	LONG $0x04c28348             // add    rdx, 4
 26459  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 26460  	JNE  LBB4_1235
 26461  	JMP  LBB4_1655
 26462  
 26463  LBB4_14:
 26464  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 26465  	JLE  LBB4_36
 26466  	WORD $0xff83; BYTE $0x09 // cmp    edi, 9
 26467  	JE   LBB4_62
 26468  	WORD $0xff83; BYTE $0x0b // cmp    edi, 11
 26469  	JE   LBB4_70
 26470  	WORD $0xff83; BYTE $0x0c // cmp    edi, 12
 26471  	JNE  LBB4_1655
 26472  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26473  	JG   LBB4_106
 26474  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26475  	JLE  LBB4_205
 26476  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26477  	JE   LBB4_309
 26478  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26479  	JE   LBB4_312
 26480  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26481  	JNE  LBB4_1655
 26482  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26483  	JLE  LBB4_1655
 26484  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 26485  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 26486  	LONG $0x04f98341         // cmp    r9d, 4
 26487  	JAE  LBB4_499
 26488  	WORD $0xf631             // xor    esi, esi
 26489  	JMP  LBB4_1110
 26490  
 26491  LBB4_26:
 26492  	WORD $0xff83; BYTE $0x02 // cmp    edi, 2
 26493  	JE   LBB4_78
 26494  	WORD $0xff83; BYTE $0x03 // cmp    edi, 3
 26495  	JNE  LBB4_1655
 26496  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26497  	JG   LBB4_113
 26498  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26499  	JLE  LBB4_210
 26500  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26501  	JE   LBB4_315
 26502  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26503  	JE   LBB4_318
 26504  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26505  	JNE  LBB4_1655
 26506  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26507  	JLE  LBB4_1655
 26508  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 26509  	LONG $0x08f98341         // cmp    r9d, 8
 26510  	JB   LBB4_35
 26511  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 26512  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 26513  	JBE  LBB4_504
 26514  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 26515  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 26516  	JBE  LBB4_504
 26517  
 26518  LBB4_35:
 26519  	WORD $0xd231 // xor    edx, edx
 26520  
 26521  LBB4_1240:
 26522  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 26523  	WORD $0xf748; BYTE $0xd6     // not    rsi
 26524  	LONG $0x01c2f641             // test    r10b, 1
 26525  	JE   LBB4_1242
 26526  	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
 26527  	WORD $0xff31                 // xor    edi, edi
 26528  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 26529  	LONG $0xd7950f40             // setne    dil
 26530  	WORD $0xdff7                 // neg    edi
 26531  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 26532  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 26533  	WORD $0x4e0f; BYTE $0xc7     // cmovle    eax, edi
 26534  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 26535  	LONG $0x01ca8348             // or    rdx, 1
 26536  
 26537  LBB4_1242:
 26538  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 26539  	JE   LBB4_1655
 26540  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 26541  
 26542  LBB4_1244:
 26543  	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
 26544  	WORD $0xff31                 // xor    edi, edi
 26545  	WORD $0xc084                 // test    al, al
 26546  	LONG $0xd7950f40             // setne    dil
 26547  	WORD $0xdff7                 // neg    edi
 26548  	WORD $0xc084                 // test    al, al
 26549  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 26550  	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
 26551  	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
 26552  	WORD $0xff31                 // xor    edi, edi
 26553  	WORD $0xc084                 // test    al, al
 26554  	LONG $0xd7950f40             // setne    dil
 26555  	WORD $0xdff7                 // neg    edi
 26556  	WORD $0xc084                 // test    al, al
 26557  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 26558  	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
 26559  	LONG $0x02c28348             // add    rdx, 2
 26560  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 26561  	JNE  LBB4_1244
 26562  	JMP  LBB4_1655
 26563  
 26564  LBB4_36:
 26565  	WORD $0xff83; BYTE $0x07 // cmp    edi, 7
 26566  	JE   LBB4_86
 26567  	WORD $0xff83; BYTE $0x08 // cmp    edi, 8
 26568  	JNE  LBB4_1655
 26569  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26570  	JG   LBB4_123
 26571  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26572  	JLE  LBB4_215
 26573  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26574  	JE   LBB4_321
 26575  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26576  	JE   LBB4_324
 26577  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26578  	JNE  LBB4_1655
 26579  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26580  	JLE  LBB4_1655
 26581  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 26582  	LONG $0x04f98341         // cmp    r9d, 4
 26583  	JAE  LBB4_507
 26584  	WORD $0xd231             // xor    edx, edx
 26585  	JMP  LBB4_998
 26586  
 26587  LBB4_46:
 26588  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26589  	JG   LBB4_135
 26590  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26591  	JLE  LBB4_220
 26592  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26593  	JE   LBB4_327
 26594  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26595  	JE   LBB4_330
 26596  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26597  	JNE  LBB4_1655
 26598  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26599  	JLE  LBB4_1655
 26600  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 26601  	LONG $0x08f98341         // cmp    r9d, 8
 26602  	JAE  LBB4_510
 26603  	WORD $0xd231             // xor    edx, edx
 26604  	JMP  LBB4_1116
 26605  
 26606  LBB4_54:
 26607  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26608  	JG   LBB4_147
 26609  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26610  	JLE  LBB4_225
 26611  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26612  	JE   LBB4_333
 26613  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26614  	JE   LBB4_336
 26615  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26616  	JNE  LBB4_1655
 26617  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26618  	JLE  LBB4_1655
 26619  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 26620  	LONG $0x08f98341         // cmp    r9d, 8
 26621  	JAE  LBB4_513
 26622  	WORD $0xd231             // xor    edx, edx
 26623  	JMP  LBB4_1121
 26624  
 26625  LBB4_62:
 26626  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26627  	JG   LBB4_157
 26628  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26629  	JLE  LBB4_230
 26630  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26631  	JE   LBB4_339
 26632  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26633  	JE   LBB4_342
 26634  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26635  	JNE  LBB4_1655
 26636  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26637  	JLE  LBB4_1655
 26638  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 26639  	LONG $0x04f98341         // cmp    r9d, 4
 26640  	JAE  LBB4_516
 26641  	WORD $0xd231             // xor    edx, edx
 26642  	JMP  LBB4_1127
 26643  
 26644  LBB4_70:
 26645  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26646  	JG   LBB4_167
 26647  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26648  	JLE  LBB4_235
 26649  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26650  	JE   LBB4_345
 26651  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26652  	JE   LBB4_348
 26653  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26654  	JNE  LBB4_1655
 26655  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26656  	JLE  LBB4_1655
 26657  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 26658  	LONG $0x04f98341         // cmp    r9d, 4
 26659  	JAE  LBB4_519
 26660  	WORD $0xd231             // xor    edx, edx
 26661  	JMP  LBB4_1133
 26662  
 26663  LBB4_78:
 26664  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26665  	JG   LBB4_178
 26666  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26667  	JLE  LBB4_240
 26668  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26669  	JE   LBB4_351
 26670  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26671  	JE   LBB4_354
 26672  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26673  	JNE  LBB4_1655
 26674  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26675  	JLE  LBB4_1655
 26676  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 26677  	LONG $0x08f98341         // cmp    r9d, 8
 26678  	JB   LBB4_85
 26679  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 26680  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 26681  	JBE  LBB4_524
 26682  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 26683  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 26684  	JBE  LBB4_524
 26685  
 26686  LBB4_85:
 26687  	WORD $0xd231 // xor    edx, edx
 26688  
 26689  LBB4_1249:
 26690  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 26691  	WORD $0xf748; BYTE $0xd6 // not    rsi
 26692  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 26693  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 26694  	LONG $0x03e78348         // and    rdi, 3
 26695  	JE   LBB4_1251
 26696  
 26697  LBB4_1250:
 26698  	WORD $0xc031             // xor    eax, eax
 26699  	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
 26700  	WORD $0x950f; BYTE $0xd0 // setne    al
 26701  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 26702  	LONG $0x01c28348         // add    rdx, 1
 26703  	LONG $0xffc78348         // add    rdi, -1
 26704  	JNE  LBB4_1250
 26705  
 26706  LBB4_1251:
 26707  	LONG $0x03fe8348 // cmp    rsi, 3
 26708  	JB   LBB4_1655
 26709  
 26710  LBB4_1252:
 26711  	WORD $0xc031                 // xor    eax, eax
 26712  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 26713  	WORD $0x950f; BYTE $0xd0     // setne    al
 26714  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 26715  	WORD $0xc031                 // xor    eax, eax
 26716  	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
 26717  	WORD $0x950f; BYTE $0xd0     // setne    al
 26718  	LONG $0x90448941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], eax
 26719  	WORD $0xc031                 // xor    eax, eax
 26720  	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
 26721  	WORD $0x950f; BYTE $0xd0     // setne    al
 26722  	LONG $0x90448941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], eax
 26723  	WORD $0xc031                 // xor    eax, eax
 26724  	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
 26725  	WORD $0x950f; BYTE $0xd0     // setne    al
 26726  	LONG $0x90448941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], eax
 26727  	LONG $0x04c28348             // add    rdx, 4
 26728  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 26729  	JNE  LBB4_1252
 26730  	JMP  LBB4_1655
 26731  
 26732  LBB4_86:
 26733  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26734  	JG   LBB4_190
 26735  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 26736  	JLE  LBB4_245
 26737  	WORD $0xfe83; BYTE $0x04 // cmp    esi, 4
 26738  	JE   LBB4_357
 26739  	WORD $0xfe83; BYTE $0x05 // cmp    esi, 5
 26740  	JE   LBB4_360
 26741  	WORD $0xfe83; BYTE $0x06 // cmp    esi, 6
 26742  	JNE  LBB4_1655
 26743  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26744  	JLE  LBB4_1655
 26745  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 26746  	LONG $0x08f98341         // cmp    r9d, 8
 26747  	JB   LBB4_93
 26748  	LONG $0x99148d4a         // lea    rdx, [rcx + 4*r11]
 26749  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 26750  	JBE  LBB4_529
 26751  	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
 26752  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 26753  	JBE  LBB4_529
 26754  
 26755  LBB4_93:
 26756  	WORD $0xd231 // xor    edx, edx
 26757  
 26758  LBB4_1257:
 26759  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 26760  	WORD $0xf748; BYTE $0xd6     // not    rsi
 26761  	LONG $0x01c3f641             // test    r11b, 1
 26762  	JE   LBB4_1259
 26763  	LONG $0x910c8b44             // mov    r9d, dword [rcx + 4*rdx]
 26764  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 26765  	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
 26766  	LONG $0xd2950f41             // setne    r10b
 26767  	WORD $0xf741; BYTE $0xda     // neg    r10d
 26768  	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
 26769  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 26770  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 26771  	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
 26772  	LONG $0x01ca8348             // or    rdx, 1
 26773  
 26774  LBB4_1259:
 26775  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 26776  	JE   LBB4_1655
 26777  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 26778  
 26779  LBB4_1261:
 26780  	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
 26781  	WORD $0xc031                 // xor    eax, eax
 26782  	WORD $0xff85                 // test    edi, edi
 26783  	WORD $0x950f; BYTE $0xd0     // setne    al
 26784  	WORD $0xd8f7                 // neg    eax
 26785  	WORD $0xff85                 // test    edi, edi
 26786  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 26787  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 26788  	LONG $0x0491448b             // mov    eax, dword [rcx + 4*rdx + 4]
 26789  	WORD $0xff31                 // xor    edi, edi
 26790  	WORD $0xc085                 // test    eax, eax
 26791  	LONG $0xd7950f40             // setne    dil
 26792  	WORD $0xdff7                 // neg    edi
 26793  	WORD $0xc085                 // test    eax, eax
 26794  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 26795  	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
 26796  	LONG $0x02c28348             // add    rdx, 2
 26797  	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
 26798  	JNE  LBB4_1261
 26799  	JMP  LBB4_1655
 26800  
 26801  LBB4_94:
 26802  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 26803  	JLE  LBB4_250
 26804  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 26805  	JE   LBB4_363
 26806  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 26807  	JE   LBB4_366
 26808  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 26809  	JNE  LBB4_1655
 26810  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26811  	JLE  LBB4_1655
 26812  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 26813  	LONG $0xff728d48         // lea    rsi, [rdx - 1]
 26814  	WORD $0xd089             // mov    eax, edx
 26815  	WORD $0xe083; BYTE $0x03 // and    eax, 3
 26816  	LONG $0x03fe8348         // cmp    rsi, 3
 26817  	JAE  LBB4_532
 26818  	WORD $0xf631             // xor    esi, esi
 26819  
 26820  LBB4_101:
 26821  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 26822  	JE   LBB4_1655
 26823  	LONG $0xf0148d49         // lea    rdx, [r8 + 8*rsi]
 26824  	LONG $0xb10c8d48         // lea    rcx, [rcx + 4*rsi]
 26825  	WORD $0xf631             // xor    esi, esi
 26826  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 26827  	JMP  LBB4_104
 26828  
 26829  LBB4_103:
 26830  	LONG $0x0c110ff2; BYTE $0xf2 // movsd    qword [rdx + 8*rsi], xmm1
 26831  	LONG $0x01c68348             // add    rsi, 1
 26832  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 26833  	JE   LBB4_1655
 26834  
 26835  LBB4_104:
 26836  	LONG $0x00b13c83 // cmp    dword [rcx + 4*rsi], 0
 26837  	LONG $0xc8280f66 // movapd    xmm1, xmm0
 26838  	JNE  LBB4_103
 26839  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 26840  	JMP  LBB4_103
 26841  
 26842  LBB4_106:
 26843  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 26844  	JLE  LBB4_255
 26845  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 26846  	JE   LBB4_369
 26847  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 26848  	JE   LBB4_372
 26849  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 26850  	JNE  LBB4_1655
 26851  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26852  	JLE  LBB4_1655
 26853  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 26854  	LONG $0x04f98341         // cmp    r9d, 4
 26855  	JB   LBB4_112
 26856  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 26857  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 26858  	JBE  LBB4_544
 26859  	LONG $0xc0148d49         // lea    rdx, [r8 + 8*rax]
 26860  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 26861  	JBE  LBB4_544
 26862  
 26863  LBB4_112:
 26864  	WORD $0xd231 // xor    edx, edx
 26865  
 26866  LBB4_1266:
 26867  	WORD $0x8948; BYTE $0xd6       // mov    rsi, rdx
 26868  	WORD $0xf748; BYTE $0xd6       // not    rsi
 26869  	WORD $0x01a8                   // test    al, 1
 26870  	JE   LBB4_1268
 26871  	LONG $0x04100ff2; BYTE $0xd1   // movsd    xmm0, qword [rcx + 8*rdx]
 26872  	LONG $0x4d280f66; BYTE $0x00   // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 26873  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 26874  	QUAD $0x0000011095100ff2       // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 26875  	LONG $0xd1560f66               // orpd    xmm2, xmm1
 26876  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 26877  	LONG $0xc8c20ff2; BYTE $0x00   // cmpeqsd    xmm1, xmm0
 26878  	LONG $0xca550f66               // andnpd    xmm1, xmm2
 26879  	LONG $0x130f4166; WORD $0xd00c // movlpd    qword [r8 + 8*rdx], xmm1
 26880  	LONG $0x01ca8348               // or    rdx, 1
 26881  
 26882  LBB4_1268:
 26883  	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
 26884  	JE   LBB4_1655
 26885  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI4_0] */
 26886  	QUAD $0x000001108d100ff2     // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 26887  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 26888  
 26889  LBB4_1270:
 26890  	LONG $0x1c100ff2; BYTE $0xd1               // movsd    xmm3, qword [rcx + 8*rdx]
 26891  	LONG $0xe3280f66                           // movapd    xmm4, xmm3
 26892  	LONG $0xe0540f66                           // andpd    xmm4, xmm0
 26893  	LONG $0xe1560f66                           // orpd    xmm4, xmm1
 26894  	LONG $0xdac20ff2; BYTE $0x00               // cmpeqsd    xmm3, xmm2
 26895  	LONG $0xdc550f66                           // andnpd    xmm3, xmm4
 26896  	LONG $0x130f4166; WORD $0xd01c             // movlpd    qword [r8 + 8*rdx], xmm3
 26897  	LONG $0x5c100ff2; WORD $0x08d1             // movsd    xmm3, qword [rcx + 8*rdx + 8]
 26898  	LONG $0xe3280f66                           // movapd    xmm4, xmm3
 26899  	LONG $0xe0540f66                           // andpd    xmm4, xmm0
 26900  	LONG $0xe1560f66                           // orpd    xmm4, xmm1
 26901  	LONG $0xdac20ff2; BYTE $0x00               // cmpeqsd    xmm3, xmm2
 26902  	LONG $0xdc550f66                           // andnpd    xmm3, xmm4
 26903  	LONG $0x130f4166; WORD $0xd05c; BYTE $0x08 // movlpd    qword [r8 + 8*rdx + 8], xmm3
 26904  	LONG $0x02c28348                           // add    rdx, 2
 26905  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 26906  	JNE  LBB4_1270
 26907  	JMP  LBB4_1655
 26908  
 26909  LBB4_113:
 26910  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 26911  	JLE  LBB4_260
 26912  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 26913  	JE   LBB4_375
 26914  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 26915  	JE   LBB4_378
 26916  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 26917  	JNE  LBB4_1655
 26918  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26919  	JLE  LBB4_1655
 26920  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 26921  	LONG $0x01f98341         // cmp    r9d, 1
 26922  	JNE  LBB4_547
 26923  	WORD $0xc031             // xor    eax, eax
 26924  
 26925  LBB4_120:
 26926  	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
 26927  	JE   LBB4_1655
 26928  	LONG $0x00013c80         // cmp    byte [rcx + rax], 0
 26929  	JNE  LBB4_982
 26930  
 26931  LBB4_122:
 26932  	LONG $0xc0570f66 // xorpd    xmm0, xmm0
 26933  	JMP  LBB4_983
 26934  
 26935  LBB4_123:
 26936  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 26937  	JLE  LBB4_265
 26938  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 26939  	JE   LBB4_381
 26940  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 26941  	JE   LBB4_384
 26942  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 26943  	JNE  LBB4_1655
 26944  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26945  	JLE  LBB4_1655
 26946  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 26947  	LONG $0xff728d48         // lea    rsi, [rdx - 1]
 26948  	WORD $0xd089             // mov    eax, edx
 26949  	WORD $0xe083; BYTE $0x03 // and    eax, 3
 26950  	LONG $0x03fe8348         // cmp    rsi, 3
 26951  	JAE  LBB4_557
 26952  	WORD $0xf631             // xor    esi, esi
 26953  
 26954  LBB4_130:
 26955  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 26956  	JE   LBB4_1655
 26957  	LONG $0xf0148d49         // lea    rdx, [r8 + 8*rsi]
 26958  	LONG $0xf10c8d48         // lea    rcx, [rcx + 8*rsi]
 26959  	WORD $0xf631             // xor    esi, esi
 26960  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 26961  	JMP  LBB4_133
 26962  
 26963  LBB4_132:
 26964  	LONG $0x0c110ff2; BYTE $0xf2 // movsd    qword [rdx + 8*rsi], xmm1
 26965  	LONG $0x01c68348             // add    rsi, 1
 26966  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 26967  	JE   LBB4_1655
 26968  
 26969  LBB4_133:
 26970  	LONG $0xf13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rsi], 0
 26971  	LONG $0xc8280f66             // movapd    xmm1, xmm0
 26972  	JNE  LBB4_132
 26973  	LONG $0xc9570f66             // xorpd    xmm1, xmm1
 26974  	JMP  LBB4_132
 26975  
 26976  LBB4_135:
 26977  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 26978  	JLE  LBB4_270
 26979  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 26980  	JE   LBB4_392
 26981  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 26982  	JE   LBB4_395
 26983  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 26984  	JNE  LBB4_1655
 26985  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 26986  	JLE  LBB4_1655
 26987  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 26988  	LONG $0xff728d48         // lea    rsi, [rdx - 1]
 26989  	WORD $0xd089             // mov    eax, edx
 26990  	WORD $0xe083; BYTE $0x03 // and    eax, 3
 26991  	LONG $0x03fe8348         // cmp    rsi, 3
 26992  	JAE  LBB4_567
 26993  	WORD $0xf631             // xor    esi, esi
 26994  
 26995  LBB4_142:
 26996  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 26997  	JE   LBB4_1655
 26998  	LONG $0xf0148d49         // lea    rdx, [r8 + 8*rsi]
 26999  	LONG $0x710c8d48         // lea    rcx, [rcx + 2*rsi]
 27000  	WORD $0xf631             // xor    esi, esi
 27001  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 27002  	JMP  LBB4_145
 27003  
 27004  LBB4_144:
 27005  	LONG $0x0c110ff2; BYTE $0xf2 // movsd    qword [rdx + 8*rsi], xmm1
 27006  	LONG $0x01c68348             // add    rsi, 1
 27007  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 27008  	JE   LBB4_1655
 27009  
 27010  LBB4_145:
 27011  	LONG $0x713c8366; BYTE $0x00 // cmp    word [rcx + 2*rsi], 0
 27012  	LONG $0xc8280f66             // movapd    xmm1, xmm0
 27013  	JNE  LBB4_144
 27014  	LONG $0xc9570f66             // xorpd    xmm1, xmm1
 27015  	JMP  LBB4_144
 27016  
 27017  LBB4_147:
 27018  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27019  	JLE  LBB4_275
 27020  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 27021  	JE   LBB4_398
 27022  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 27023  	JE   LBB4_401
 27024  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 27025  	JNE  LBB4_1655
 27026  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27027  	JLE  LBB4_1655
 27028  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 27029  	LONG $0x01f98341         // cmp    r9d, 1
 27030  	JNE  LBB4_577
 27031  	WORD $0xc031             // xor    eax, eax
 27032  
 27033  LBB4_154:
 27034  	WORD $0xc2f6; BYTE $0x01     // test    dl, 1
 27035  	JE   LBB4_1655
 27036  	LONG $0x413c8366; BYTE $0x00 // cmp    word [rcx + 2*rax], 0
 27037  	JE   LBB4_122
 27038  
 27039  LBB4_982:
 27040  	QUAD $0x0000012085100ff2 // movsd    xmm0, qword 288[rbp] /* [rip + .LCPI4_13] */
 27041  
 27042  LBB4_983:
 27043  	JLE  LBB4_985
 27044  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 27045  
 27046  LBB4_985:
 27047  	LONG $0x110f41f2; WORD $0xc004 // movsd    qword [r8 + 8*rax], xmm0
 27048  	JMP  LBB4_1655
 27049  
 27050  LBB4_157:
 27051  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27052  	JLE  LBB4_280
 27053  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 27054  	JE   LBB4_404
 27055  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 27056  	JE   LBB4_407
 27057  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 27058  	JNE  LBB4_1655
 27059  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27060  	JLE  LBB4_1655
 27061  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 27062  	LONG $0x01f98341         // cmp    r9d, 1
 27063  	JNE  LBB4_587
 27064  	WORD $0xc031             // xor    eax, eax
 27065  
 27066  LBB4_164:
 27067  	WORD $0xc2f6; BYTE $0x01     // test    dl, 1
 27068  	JE   LBB4_1655
 27069  	LONG $0xc13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rax], 0
 27070  	JE   LBB4_122
 27071  	JMP  LBB4_982
 27072  
 27073  LBB4_167:
 27074  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27075  	JLE  LBB4_285
 27076  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 27077  	JE   LBB4_413
 27078  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 27079  	JE   LBB4_419
 27080  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 27081  	JNE  LBB4_1655
 27082  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27083  	JLE  LBB4_1655
 27084  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 27085  	LONG $0x01f98341         // cmp    r9d, 1
 27086  	JNE  LBB4_597
 27087  	WORD $0xc031             // xor    eax, eax
 27088  
 27089  LBB4_174:
 27090  	WORD $0xc2f6; BYTE $0x01     // test    dl, 1
 27091  	JE   LBB4_1655
 27092  	LONG $0x0c100ff3; BYTE $0x81 // movss    xmm1, dword [rcx + 4*rax]
 27093  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 27094  	WORD $0x570f; BYTE $0xd2     // xorps    xmm2, xmm2
 27095  	WORD $0x2e0f; BYTE $0xd1     // ucomiss    xmm2, xmm1
 27096  	JE   LBB4_177
 27097  	WORD $0x500f; BYTE $0xc9     // movmskps    ecx, xmm1
 27098  	WORD $0xe183; BYTE $0x01     // and    ecx, 1
 27099  	WORD $0xd9f7                 // neg    ecx
 27100  	WORD $0xc983; BYTE $0x01     // or    ecx, 1
 27101  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 27102  	LONG $0xc12a0ff3             // cvtsi2ss    xmm0, ecx
 27103  	LONG $0xc05a0ff3             // cvtss2sd    xmm0, xmm0
 27104  
 27105  LBB4_177:
 27106  	LONG $0x110f41f2; WORD $0xc004 // movsd    qword [r8 + 8*rax], xmm0
 27107  	JMP  LBB4_1655
 27108  
 27109  LBB4_178:
 27110  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27111  	JLE  LBB4_293
 27112  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 27113  	JE   LBB4_422
 27114  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 27115  	JE   LBB4_425
 27116  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 27117  	JNE  LBB4_1655
 27118  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27119  	JLE  LBB4_1655
 27120  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 27121  	LONG $0xff728d48         // lea    rsi, [rdx - 1]
 27122  	WORD $0xd089             // mov    eax, edx
 27123  	WORD $0xe083; BYTE $0x03 // and    eax, 3
 27124  	LONG $0x03fe8348         // cmp    rsi, 3
 27125  	JAE  LBB4_603
 27126  	WORD $0xf631             // xor    esi, esi
 27127  
 27128  LBB4_185:
 27129  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 27130  	JE   LBB4_1655
 27131  	LONG $0xf0148d49         // lea    rdx, [r8 + 8*rsi]
 27132  	WORD $0x0148; BYTE $0xf1 // add    rcx, rsi
 27133  	WORD $0xf631             // xor    esi, esi
 27134  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 27135  	JMP  LBB4_188
 27136  
 27137  LBB4_187:
 27138  	LONG $0x0c110ff2; BYTE $0xf2 // movsd    qword [rdx + 8*rsi], xmm1
 27139  	LONG $0x01c68348             // add    rsi, 1
 27140  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 27141  	JE   LBB4_1655
 27142  
 27143  LBB4_188:
 27144  	LONG $0x00313c80 // cmp    byte [rcx + rsi], 0
 27145  	LONG $0xc8280f66 // movapd    xmm1, xmm0
 27146  	JNE  LBB4_187
 27147  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 27148  	JMP  LBB4_187
 27149  
 27150  LBB4_190:
 27151  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27152  	JLE  LBB4_298
 27153  	WORD $0xfe83; BYTE $0x09 // cmp    esi, 9
 27154  	JE   LBB4_428
 27155  	WORD $0xfe83; BYTE $0x0b // cmp    esi, 11
 27156  	JE   LBB4_431
 27157  	WORD $0xfe83; BYTE $0x0c // cmp    esi, 12
 27158  	JNE  LBB4_1655
 27159  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27160  	JLE  LBB4_1655
 27161  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 27162  	LONG $0x01f98341         // cmp    r9d, 1
 27163  	JNE  LBB4_613
 27164  	WORD $0xc031             // xor    eax, eax
 27165  
 27166  LBB4_197:
 27167  	WORD $0xc2f6; BYTE $0x01 // test    dl, 1
 27168  	JE   LBB4_1655
 27169  	LONG $0x00813c83         // cmp    dword [rcx + 4*rax], 0
 27170  	JE   LBB4_122
 27171  	JMP  LBB4_982
 27172  
 27173  LBB4_200:
 27174  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27175  	JE   LBB4_434
 27176  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27177  	JNE  LBB4_1655
 27178  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27179  	JLE  LBB4_1655
 27180  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27181  	LONG $0x08f98341         // cmp    r9d, 8
 27182  	JB   LBB4_204
 27183  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 27184  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27185  	JBE  LBB4_625
 27186  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 27187  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27188  	JBE  LBB4_625
 27189  
 27190  LBB4_204:
 27191  	WORD $0xd231 // xor    edx, edx
 27192  
 27193  LBB4_1275:
 27194  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 27195  	WORD $0xf748; BYTE $0xd6 // not    rsi
 27196  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 27197  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 27198  	LONG $0x03e78348         // and    rdi, 3
 27199  	JE   LBB4_1277
 27200  
 27201  LBB4_1276:
 27202  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 27203  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 27204  	LONG $0x01c28348             // add    rdx, 1
 27205  	LONG $0xffc78348             // add    rdi, -1
 27206  	JNE  LBB4_1276
 27207  
 27208  LBB4_1277:
 27209  	LONG $0x03fe8348 // cmp    rsi, 3
 27210  	JB   LBB4_1655
 27211  
 27212  LBB4_1278:
 27213  	LONG $0x00913c83               // cmp    dword [rcx + 4*rdx], 0
 27214  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 27215  	LONG $0x04917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 4], 0
 27216  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 27217  	LONG $0x08917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 8], 0
 27218  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 27219  	LONG $0x0c917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 12], 0
 27220  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 27221  	LONG $0x04c28348               // add    rdx, 4
 27222  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 27223  	JNE  LBB4_1278
 27224  	JMP  LBB4_1655
 27225  
 27226  LBB4_205:
 27227  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27228  	JE   LBB4_437
 27229  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27230  	JNE  LBB4_1655
 27231  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27232  	JLE  LBB4_1655
 27233  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27234  	LONG $0x04f98341         // cmp    r9d, 4
 27235  	JB   LBB4_209
 27236  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 27237  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27238  	JBE  LBB4_630
 27239  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 27240  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27241  	JBE  LBB4_630
 27242  
 27243  LBB4_209:
 27244  	WORD $0xd231 // xor    edx, edx
 27245  
 27246  LBB4_1283:
 27247  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 27248  	WORD $0xf748; BYTE $0xd6     // not    rsi
 27249  	WORD $0x01a8                 // test    al, 1
 27250  	JE   LBB4_1285
 27251  	LONG $0x04100ff2; BYTE $0xd1 // movsd    xmm0, qword [rcx + 8*rdx]
 27252  	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
 27253  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 27254  	LONG $0xc82e0f66             // ucomisd    xmm1, xmm0
 27255  	LONG $0x45540f66; BYTE $0x00 // andpd    xmm0, oword 0[rbp] /* [rip + .LCPI4_0] */
 27256  	QUAD $0x000001108d100ff2     // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 27257  	LONG $0xc8560f66             // orpd    xmm1, xmm0
 27258  	LONG $0xf92c0ff2             // cvttsd2si    edi, xmm1
 27259  	LONG $0xf9440f41             // cmove    edi, r9d
 27260  	LONG $0x103c8841             // mov    byte [r8 + rdx], dil
 27261  	LONG $0x01ca8348             // or    rdx, 1
 27262  
 27263  LBB4_1285:
 27264  	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
 27265  	JE   LBB4_1655
 27266  	WORD $0xf631                 // xor    esi, esi
 27267  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 27268  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 27269  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 27270  
 27271  LBB4_1287:
 27272  	LONG $0x1c100ff2; BYTE $0xd1   // movsd    xmm3, qword [rcx + 8*rdx]
 27273  	LONG $0xc32e0f66               // ucomisd    xmm0, xmm3
 27274  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 27275  	LONG $0xda560f66               // orpd    xmm3, xmm2
 27276  	LONG $0xfb2c0ff2               // cvttsd2si    edi, xmm3
 27277  	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
 27278  	LONG $0x103c8841               // mov    byte [r8 + rdx], dil
 27279  	LONG $0x5c100ff2; WORD $0x08d1 // movsd    xmm3, qword [rcx + 8*rdx + 8]
 27280  	LONG $0xc32e0f66               // ucomisd    xmm0, xmm3
 27281  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 27282  	LONG $0xda560f66               // orpd    xmm3, xmm2
 27283  	LONG $0xfb2c0ff2               // cvttsd2si    edi, xmm3
 27284  	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
 27285  	LONG $0x107c8841; BYTE $0x01   // mov    byte [r8 + rdx + 1], dil
 27286  	LONG $0x02c28348               // add    rdx, 2
 27287  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 27288  	JNE  LBB4_1287
 27289  	JMP  LBB4_1655
 27290  
 27291  LBB4_210:
 27292  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27293  	JE   LBB4_440
 27294  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27295  	JNE  LBB4_1655
 27296  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27297  	JLE  LBB4_1655
 27298  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27299  	LONG $0x20f98341         // cmp    r9d, 32
 27300  	JB   LBB4_214
 27301  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 27302  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27303  	JBE  LBB4_635
 27304  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 27305  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27306  	JBE  LBB4_635
 27307  
 27308  LBB4_214:
 27309  	WORD $0xf631 // xor    esi, esi
 27310  
 27311  LBB4_1292:
 27312  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 27313  	WORD $0xf748; BYTE $0xd0     // not    rax
 27314  	LONG $0x01c2f641             // test    r10b, 1
 27315  	JE   LBB4_1294
 27316  	LONG $0x313c8a40             // mov    dil, byte [rcx + rsi]
 27317  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 27318  	LONG $0xd1950f41             // setne    r9b
 27319  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 27320  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 27321  	LONG $0xc9b60f45             // movzx    r9d, r9b
 27322  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27323  	LONG $0xf94e0f41             // cmovle    edi, r9d
 27324  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 27325  	LONG $0x01ce8348             // or    rsi, 1
 27326  
 27327  LBB4_1294:
 27328  	WORD $0x014c; BYTE $0xd0     // add    rax, r10
 27329  	JE   LBB4_1655
 27330  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27331  
 27332  LBB4_1296:
 27333  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
 27334  	WORD $0xc084                 // test    al, al
 27335  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27336  	WORD $0xdaf6                 // neg    dl
 27337  	WORD $0xc084                 // test    al, al
 27338  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27339  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 27340  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 27341  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
 27342  	WORD $0xc084                 // test    al, al
 27343  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27344  	WORD $0xdaf6                 // neg    dl
 27345  	WORD $0xc084                 // test    al, al
 27346  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27347  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 27348  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 27349  	LONG $0x02c68348             // add    rsi, 2
 27350  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 27351  	JNE  LBB4_1296
 27352  	JMP  LBB4_1655
 27353  
 27354  LBB4_215:
 27355  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27356  	JE   LBB4_443
 27357  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27358  	JNE  LBB4_1655
 27359  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27360  	JLE  LBB4_1655
 27361  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27362  	LONG $0x04f98341         // cmp    r9d, 4
 27363  	JB   LBB4_219
 27364  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 27365  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27366  	JBE  LBB4_640
 27367  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 27368  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27369  	JBE  LBB4_640
 27370  
 27371  LBB4_219:
 27372  	WORD $0xd231 // xor    edx, edx
 27373  
 27374  LBB4_1301:
 27375  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 27376  	WORD $0xf748; BYTE $0xd6 // not    rsi
 27377  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 27378  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 27379  	LONG $0x03e78348         // and    rdi, 3
 27380  	JE   LBB4_1303
 27381  
 27382  LBB4_1302:
 27383  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 27384  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 27385  	LONG $0x01c28348             // add    rdx, 1
 27386  	LONG $0xffc78348             // add    rdi, -1
 27387  	JNE  LBB4_1302
 27388  
 27389  LBB4_1303:
 27390  	LONG $0x03fe8348 // cmp    rsi, 3
 27391  	JB   LBB4_1655
 27392  
 27393  LBB4_1304:
 27394  	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
 27395  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 27396  	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
 27397  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 27398  	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
 27399  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 27400  	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
 27401  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 27402  	LONG $0x04c28348               // add    rdx, 4
 27403  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 27404  	JNE  LBB4_1304
 27405  	JMP  LBB4_1655
 27406  
 27407  LBB4_220:
 27408  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27409  	JE   LBB4_446
 27410  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27411  	JNE  LBB4_1655
 27412  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27413  	JLE  LBB4_1655
 27414  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27415  	LONG $0x10f98341         // cmp    r9d, 16
 27416  	JB   LBB4_224
 27417  	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
 27418  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27419  	JBE  LBB4_645
 27420  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 27421  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27422  	JBE  LBB4_645
 27423  
 27424  LBB4_224:
 27425  	WORD $0xd231 // xor    edx, edx
 27426  
 27427  LBB4_1309:
 27428  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 27429  	WORD $0xf748; BYTE $0xd6 // not    rsi
 27430  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 27431  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 27432  	LONG $0x03e78348         // and    rdi, 3
 27433  	JE   LBB4_1311
 27434  
 27435  LBB4_1310:
 27436  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 27437  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 27438  	LONG $0x01c28348             // add    rdx, 1
 27439  	LONG $0xffc78348             // add    rdi, -1
 27440  	JNE  LBB4_1310
 27441  
 27442  LBB4_1311:
 27443  	LONG $0x03fe8348 // cmp    rsi, 3
 27444  	JB   LBB4_1655
 27445  
 27446  LBB4_1312:
 27447  	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
 27448  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 27449  	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
 27450  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 27451  	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
 27452  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 27453  	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
 27454  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 27455  	LONG $0x04c28348               // add    rdx, 4
 27456  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 27457  	JNE  LBB4_1312
 27458  	JMP  LBB4_1655
 27459  
 27460  LBB4_225:
 27461  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27462  	JE   LBB4_449
 27463  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27464  	JNE  LBB4_1655
 27465  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27466  	JLE  LBB4_1655
 27467  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27468  	LONG $0x10f98341         // cmp    r9d, 16
 27469  	JB   LBB4_229
 27470  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 27471  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27472  	JBE  LBB4_650
 27473  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 27474  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27475  	JBE  LBB4_650
 27476  
 27477  LBB4_229:
 27478  	WORD $0xf631 // xor    esi, esi
 27479  
 27480  LBB4_1317:
 27481  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 27482  	WORD $0xf748; BYTE $0xd0     // not    rax
 27483  	LONG $0x01c2f641             // test    r10b, 1
 27484  	JE   LBB4_1319
 27485  	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
 27486  	WORD $0x8566; BYTE $0xff     // test    di, di
 27487  	LONG $0xd1950f41             // setne    r9b
 27488  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 27489  	WORD $0x8566; BYTE $0xff     // test    di, di
 27490  	LONG $0xc9b60f45             // movzx    r9d, r9b
 27491  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27492  	LONG $0xf94e0f41             // cmovle    edi, r9d
 27493  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 27494  	LONG $0x01ce8348             // or    rsi, 1
 27495  
 27496  LBB4_1319:
 27497  	WORD $0x014c; BYTE $0xd0       // add    rax, r10
 27498  	JE   LBB4_1655
 27499  	LONG $0x0001b941; WORD $0x0000 // mov    r9d, 1
 27500  
 27501  LBB4_1321:
 27502  	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
 27503  	WORD $0x8566; BYTE $0xff     // test    di, di
 27504  	WORD $0x950f; BYTE $0xd0     // setne    al
 27505  	WORD $0xd8f6                 // neg    al
 27506  	WORD $0x8566; BYTE $0xff     // test    di, di
 27507  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 27508  	LONG $0xc14f0f41             // cmovg    eax, r9d
 27509  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 27510  	LONG $0x7144b70f; BYTE $0x02 // movzx    eax, word [rcx + 2*rsi + 2]
 27511  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 27512  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27513  	WORD $0xdaf6                 // neg    dl
 27514  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 27515  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27516  	LONG $0xc14f0f41             // cmovg    eax, r9d
 27517  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 27518  	LONG $0x02c68348             // add    rsi, 2
 27519  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 27520  	JNE  LBB4_1321
 27521  	JMP  LBB4_1655
 27522  
 27523  LBB4_230:
 27524  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27525  	JE   LBB4_452
 27526  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27527  	JNE  LBB4_1655
 27528  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27529  	JLE  LBB4_1655
 27530  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27531  	LONG $0x04f98341         // cmp    r9d, 4
 27532  	JB   LBB4_234
 27533  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 27534  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27535  	JBE  LBB4_655
 27536  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 27537  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27538  	JBE  LBB4_655
 27539  
 27540  LBB4_234:
 27541  	WORD $0xf631 // xor    esi, esi
 27542  
 27543  LBB4_1326:
 27544  	WORD $0x8948; BYTE $0xf2     // mov    rdx, rsi
 27545  	WORD $0xf748; BYTE $0xd2     // not    rdx
 27546  	LONG $0x01c2f641             // test    r10b, 1
 27547  	JE   LBB4_1328
 27548  	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
 27549  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 27550  	WORD $0x950f; BYTE $0xd0     // setne    al
 27551  	WORD $0xd8f6                 // neg    al
 27552  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 27553  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 27554  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27555  	WORD $0x4e0f; BYTE $0xf8     // cmovle    edi, eax
 27556  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 27557  	LONG $0x01ce8348             // or    rsi, 1
 27558  
 27559  LBB4_1328:
 27560  	WORD $0x014c; BYTE $0xd2     // add    rdx, r10
 27561  	JE   LBB4_1655
 27562  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27563  
 27564  LBB4_1330:
 27565  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
 27566  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 27567  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27568  	WORD $0xdaf6                 // neg    dl
 27569  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 27570  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27571  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 27572  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 27573  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
 27574  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 27575  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27576  	WORD $0xdaf6                 // neg    dl
 27577  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 27578  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27579  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 27580  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 27581  	LONG $0x02c68348             // add    rsi, 2
 27582  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 27583  	JNE  LBB4_1330
 27584  	JMP  LBB4_1655
 27585  
 27586  LBB4_235:
 27587  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27588  	JE   LBB4_455
 27589  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27590  	JNE  LBB4_1655
 27591  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27592  	JLE  LBB4_1655
 27593  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27594  	LONG $0x08f98341         // cmp    r9d, 8
 27595  	JB   LBB4_239
 27596  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 27597  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27598  	JBE  LBB4_660
 27599  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 27600  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27601  	JBE  LBB4_660
 27602  
 27603  LBB4_239:
 27604  	WORD $0xd231 // xor    edx, edx
 27605  
 27606  LBB4_1335:
 27607  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 27608  	WORD $0xf748; BYTE $0xd6     // not    rsi
 27609  	LONG $0x01c2f641             // test    r10b, 1
 27610  	JE   LBB4_1337
 27611  	LONG $0x046e0f66; BYTE $0x91 // movd    xmm0, dword [rcx + 4*rdx]
 27612  	LONG $0xc77e0f66             // movd    edi, xmm0
 27613  	WORD $0xff85                 // test    edi, edi
 27614  	WORD $0x990f; BYTE $0xd0     // setns    al
 27615  	WORD $0xc000                 // add    al, al
 27616  	WORD $0xff04                 // add    al, -1
 27617  	WORD $0xff31                 // xor    edi, edi
 27618  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 27619  	WORD $0x2e0f; BYTE $0xc8     // ucomiss    xmm1, xmm0
 27620  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 27621  	WORD $0x440f; BYTE $0xc7     // cmove    eax, edi
 27622  	LONG $0x10048841             // mov    byte [r8 + rdx], al
 27623  	LONG $0x01ca8348             // or    rdx, 1
 27624  
 27625  LBB4_1337:
 27626  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 27627  	JE   LBB4_1655
 27628  	WORD $0xf631             // xor    esi, esi
 27629  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 27630  
 27631  LBB4_1339:
 27632  	LONG $0x0c6e0f66; BYTE $0x91   // movd    xmm1, dword [rcx + 4*rdx]
 27633  	LONG $0xc87e0f66               // movd    eax, xmm1
 27634  	WORD $0xc085                   // test    eax, eax
 27635  	WORD $0x990f; BYTE $0xd0       // setns    al
 27636  	WORD $0xc000                   // add    al, al
 27637  	WORD $0xff04                   // add    al, -1
 27638  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
 27639  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 27640  	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
 27641  	LONG $0x10048841               // mov    byte [r8 + rdx], al
 27642  	LONG $0x4c6e0f66; WORD $0x0491 // movd    xmm1, dword [rcx + 4*rdx + 4]
 27643  	LONG $0xc87e0f66               // movd    eax, xmm1
 27644  	WORD $0xc085                   // test    eax, eax
 27645  	WORD $0x990f; BYTE $0xd0       // setns    al
 27646  	WORD $0xc000                   // add    al, al
 27647  	WORD $0xff04                   // add    al, -1
 27648  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
 27649  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 27650  	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
 27651  	LONG $0x10448841; BYTE $0x01   // mov    byte [r8 + rdx + 1], al
 27652  	LONG $0x02c28348               // add    rdx, 2
 27653  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 27654  	JNE  LBB4_1339
 27655  	JMP  LBB4_1655
 27656  
 27657  LBB4_240:
 27658  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27659  	JE   LBB4_458
 27660  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27661  	JNE  LBB4_1655
 27662  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27663  	JLE  LBB4_1655
 27664  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27665  	LONG $0x20f98341         // cmp    r9d, 32
 27666  	JB   LBB4_244
 27667  	LONG $0x01148d48         // lea    rdx, [rcx + rax]
 27668  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27669  	JBE  LBB4_665
 27670  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 27671  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27672  	JBE  LBB4_665
 27673  
 27674  LBB4_244:
 27675  	WORD $0xd231 // xor    edx, edx
 27676  
 27677  LBB4_1344:
 27678  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 27679  	WORD $0xf748; BYTE $0xd6 // not    rsi
 27680  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 27681  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 27682  	LONG $0x03e78348         // and    rdi, 3
 27683  	JE   LBB4_1346
 27684  
 27685  LBB4_1345:
 27686  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 27687  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 27688  	LONG $0x01c28348             // add    rdx, 1
 27689  	LONG $0xffc78348             // add    rdi, -1
 27690  	JNE  LBB4_1345
 27691  
 27692  LBB4_1346:
 27693  	LONG $0x03fe8348 // cmp    rsi, 3
 27694  	JB   LBB4_1655
 27695  
 27696  LBB4_1347:
 27697  	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
 27698  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 27699  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 27700  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 27701  	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
 27702  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 27703  	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
 27704  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 27705  	LONG $0x04c28348               // add    rdx, 4
 27706  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 27707  	JNE  LBB4_1347
 27708  	JMP  LBB4_1655
 27709  
 27710  LBB4_245:
 27711  	WORD $0xfe83; BYTE $0x02 // cmp    esi, 2
 27712  	JE   LBB4_461
 27713  	WORD $0xfe83; BYTE $0x03 // cmp    esi, 3
 27714  	JNE  LBB4_1655
 27715  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27716  	JLE  LBB4_1655
 27717  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27718  	LONG $0x08f98341         // cmp    r9d, 8
 27719  	JB   LBB4_249
 27720  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 27721  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27722  	JBE  LBB4_670
 27723  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 27724  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27725  	JBE  LBB4_670
 27726  
 27727  LBB4_249:
 27728  	WORD $0xf631 // xor    esi, esi
 27729  
 27730  LBB4_1352:
 27731  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 27732  	WORD $0xf748; BYTE $0xd0     // not    rax
 27733  	LONG $0x01c2f641             // test    r10b, 1
 27734  	JE   LBB4_1354
 27735  	WORD $0x3c8b; BYTE $0xb1     // mov    edi, dword [rcx + 4*rsi]
 27736  	WORD $0xff85                 // test    edi, edi
 27737  	LONG $0xd1950f41             // setne    r9b
 27738  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 27739  	WORD $0xff85                 // test    edi, edi
 27740  	LONG $0xc9b60f45             // movzx    r9d, r9b
 27741  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27742  	LONG $0xf94e0f41             // cmovle    edi, r9d
 27743  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 27744  	LONG $0x01ce8348             // or    rsi, 1
 27745  
 27746  LBB4_1354:
 27747  	WORD $0x014c; BYTE $0xd0       // add    rax, r10
 27748  	JE   LBB4_1655
 27749  	LONG $0x0001b941; WORD $0x0000 // mov    r9d, 1
 27750  
 27751  LBB4_1356:
 27752  	WORD $0x3c8b; BYTE $0xb1     // mov    edi, dword [rcx + 4*rsi]
 27753  	WORD $0xff85                 // test    edi, edi
 27754  	WORD $0x950f; BYTE $0xd0     // setne    al
 27755  	WORD $0xd8f6                 // neg    al
 27756  	WORD $0xff85                 // test    edi, edi
 27757  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 27758  	LONG $0xc14f0f41             // cmovg    eax, r9d
 27759  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 27760  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
 27761  	WORD $0xc085                 // test    eax, eax
 27762  	WORD $0x950f; BYTE $0xd2     // setne    dl
 27763  	WORD $0xdaf6                 // neg    dl
 27764  	WORD $0xc085                 // test    eax, eax
 27765  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 27766  	LONG $0xc14f0f41             // cmovg    eax, r9d
 27767  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 27768  	LONG $0x02c68348             // add    rsi, 2
 27769  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 27770  	JNE  LBB4_1356
 27771  	JMP  LBB4_1655
 27772  
 27773  LBB4_250:
 27774  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27775  	JE   LBB4_464
 27776  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27777  	JNE  LBB4_1655
 27778  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27779  	JLE  LBB4_1655
 27780  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27781  	LONG $0x04f98341         // cmp    r9d, 4
 27782  	JAE  LBB4_673
 27783  	WORD $0xd231             // xor    edx, edx
 27784  	JMP  LBB4_1003
 27785  
 27786  LBB4_255:
 27787  	WORD $0xfe83; BYTE $0x07               // cmp    esi, 7
 27788  	JE   LBB4_467
 27789  	WORD $0xfe83; BYTE $0x08               // cmp    esi, 8
 27790  	JNE  LBB4_1655
 27791  	WORD $0x8545; BYTE $0xc9               // test    r9d, r9d
 27792  	JLE  LBB4_1655
 27793  	WORD $0x8945; BYTE $0xca               // mov    r10d, r9d
 27794  	QUAD $0x000000000000bb49; WORD $0x8000 // mov    r11, -9223372036854775808
 27795  	LONG $0x01f98341                       // cmp    r9d, 1
 27796  	JNE  LBB4_676
 27797  	WORD $0xf631                           // xor    esi, esi
 27798  	JMP  LBB4_1008
 27799  
 27800  LBB4_260:
 27801  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27802  	JE   LBB4_470
 27803  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27804  	JNE  LBB4_1655
 27805  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27806  	JLE  LBB4_1655
 27807  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27808  	LONG $0x04f98341         // cmp    r9d, 4
 27809  	JB   LBB4_264
 27810  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 27811  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27812  	JBE  LBB4_681
 27813  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 27814  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27815  	JBE  LBB4_681
 27816  
 27817  LBB4_264:
 27818  	WORD $0xd231 // xor    edx, edx
 27819  
 27820  LBB4_1361:
 27821  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 27822  	WORD $0xf748; BYTE $0xd6     // not    rsi
 27823  	LONG $0x01c2f641             // test    r10b, 1
 27824  	JE   LBB4_1363
 27825  	WORD $0x048a; BYTE $0x11     // mov    al, byte [rcx + rdx]
 27826  	WORD $0xff31                 // xor    edi, edi
 27827  	WORD $0xc084                 // test    al, al
 27828  	LONG $0xd7950f40             // setne    dil
 27829  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 27830  	WORD $0xc084                 // test    al, al
 27831  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 27832  	LONG $0xc74e0f48             // cmovle    rax, rdi
 27833  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 27834  	LONG $0x01ca8348             // or    rdx, 1
 27835  
 27836  LBB4_1363:
 27837  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 27838  	JE   LBB4_1655
 27839  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 27840  
 27841  LBB4_1365:
 27842  	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
 27843  	WORD $0xff31                 // xor    edi, edi
 27844  	WORD $0xc084                 // test    al, al
 27845  	LONG $0xd7950f40             // setne    dil
 27846  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 27847  	WORD $0xc084                 // test    al, al
 27848  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 27849  	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
 27850  	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
 27851  	WORD $0xff31                 // xor    edi, edi
 27852  	WORD $0xc084                 // test    al, al
 27853  	LONG $0xd7950f40             // setne    dil
 27854  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 27855  	WORD $0xc084                 // test    al, al
 27856  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 27857  	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
 27858  	LONG $0x02c28348             // add    rdx, 2
 27859  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 27860  	JNE  LBB4_1365
 27861  	JMP  LBB4_1655
 27862  
 27863  LBB4_265:
 27864  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27865  	JE   LBB4_473
 27866  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27867  	JNE  LBB4_1655
 27868  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27869  	JLE  LBB4_1655
 27870  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27871  	LONG $0x04f98341         // cmp    r9d, 4
 27872  	JB   LBB4_269
 27873  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 27874  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27875  	JBE  LBB4_686
 27876  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 27877  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27878  	JBE  LBB4_686
 27879  
 27880  LBB4_269:
 27881  	WORD $0xd231 // xor    edx, edx
 27882  
 27883  LBB4_1370:
 27884  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 27885  	WORD $0xf748; BYTE $0xd6 // not    rsi
 27886  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 27887  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 27888  	LONG $0x03e78348         // and    rdi, 3
 27889  	JE   LBB4_1372
 27890  
 27891  LBB4_1371:
 27892  	WORD $0xc031                 // xor    eax, eax
 27893  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 27894  	WORD $0x950f; BYTE $0xd0     // setne    al
 27895  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 27896  	LONG $0x01c28348             // add    rdx, 1
 27897  	LONG $0xffc78348             // add    rdi, -1
 27898  	JNE  LBB4_1371
 27899  
 27900  LBB4_1372:
 27901  	LONG $0x03fe8348 // cmp    rsi, 3
 27902  	JB   LBB4_1655
 27903  
 27904  LBB4_1373:
 27905  	WORD $0xc031                   // xor    eax, eax
 27906  	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
 27907  	WORD $0x950f; BYTE $0xd0       // setne    al
 27908  	LONG $0xd0048949               // mov    qword [r8 + 8*rdx], rax
 27909  	WORD $0xc031                   // xor    eax, eax
 27910  	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
 27911  	WORD $0x950f; BYTE $0xd0       // setne    al
 27912  	LONG $0xd0448949; BYTE $0x08   // mov    qword [r8 + 8*rdx + 8], rax
 27913  	WORD $0xc031                   // xor    eax, eax
 27914  	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
 27915  	WORD $0x950f; BYTE $0xd0       // setne    al
 27916  	LONG $0xd0448949; BYTE $0x10   // mov    qword [r8 + 8*rdx + 16], rax
 27917  	WORD $0xc031                   // xor    eax, eax
 27918  	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
 27919  	WORD $0x950f; BYTE $0xd0       // setne    al
 27920  	LONG $0xd0448949; BYTE $0x18   // mov    qword [r8 + 8*rdx + 24], rax
 27921  	LONG $0x04c28348               // add    rdx, 4
 27922  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 27923  	JNE  LBB4_1373
 27924  	JMP  LBB4_1655
 27925  
 27926  LBB4_270:
 27927  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27928  	JE   LBB4_476
 27929  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27930  	JNE  LBB4_1655
 27931  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27932  	JLE  LBB4_1655
 27933  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 27934  	LONG $0x04f98341         // cmp    r9d, 4
 27935  	JAE  LBB4_689
 27936  	WORD $0xd231             // xor    edx, edx
 27937  	JMP  LBB4_1014
 27938  
 27939  LBB4_275:
 27940  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27941  	JE   LBB4_479
 27942  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27943  	JNE  LBB4_1655
 27944  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27945  	JLE  LBB4_1655
 27946  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 27947  	LONG $0x04f98341         // cmp    r9d, 4
 27948  	JAE  LBB4_692
 27949  	WORD $0xd231             // xor    edx, edx
 27950  	JMP  LBB4_1019
 27951  
 27952  LBB4_280:
 27953  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 27954  	JE   LBB4_482
 27955  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 27956  	JNE  LBB4_1655
 27957  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 27958  	JLE  LBB4_1655
 27959  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 27960  	LONG $0x04f98341         // cmp    r9d, 4
 27961  	JB   LBB4_284
 27962  	LONG $0xd9148d4a         // lea    rdx, [rcx + 8*r11]
 27963  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 27964  	JBE  LBB4_697
 27965  	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
 27966  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 27967  	JBE  LBB4_697
 27968  
 27969  LBB4_284:
 27970  	WORD $0xd231 // xor    edx, edx
 27971  
 27972  LBB4_1378:
 27973  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 27974  	WORD $0xf748; BYTE $0xd6     // not    rsi
 27975  	LONG $0x01c3f641             // test    r11b, 1
 27976  	JE   LBB4_1380
 27977  	LONG $0xd10c8b4c             // mov    r9, qword [rcx + 8*rdx]
 27978  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 27979  	WORD $0x854d; BYTE $0xc9     // test    r9, r9
 27980  	LONG $0xd2950f41             // setne    r10b
 27981  	WORD $0xf749; BYTE $0xda     // neg    r10
 27982  	WORD $0x854d; BYTE $0xc9     // test    r9, r9
 27983  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 27984  	LONG $0xfa4e0f49             // cmovle    rdi, r10
 27985  	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
 27986  	LONG $0x01ca8348             // or    rdx, 1
 27987  
 27988  LBB4_1380:
 27989  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 27990  	JE   LBB4_1655
 27991  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 27992  
 27993  LBB4_1382:
 27994  	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
 27995  	WORD $0xc031                 // xor    eax, eax
 27996  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 27997  	WORD $0x950f; BYTE $0xd0     // setne    al
 27998  	WORD $0xf748; BYTE $0xd8     // neg    rax
 27999  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 28000  	LONG $0xc64f0f48             // cmovg    rax, rsi
 28001  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 28002  	LONG $0xd1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rdx + 8]
 28003  	WORD $0xff31                 // xor    edi, edi
 28004  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 28005  	LONG $0xd7950f40             // setne    dil
 28006  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 28007  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 28008  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 28009  	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
 28010  	LONG $0x02c28348             // add    rdx, 2
 28011  	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
 28012  	JNE  LBB4_1382
 28013  	JMP  LBB4_1655
 28014  
 28015  LBB4_285:
 28016  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 28017  	JE   LBB4_485
 28018  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 28019  	JNE  LBB4_1655
 28020  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28021  	JLE  LBB4_1655
 28022  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28023  	LONG $0x01f98341         // cmp    r9d, 1
 28024  	JNE  LBB4_700
 28025  	WORD $0xc031             // xor    eax, eax
 28026  	JMP  LBB4_290
 28027  
 28028  LBB4_293:
 28029  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 28030  	JE   LBB4_488
 28031  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 28032  	JNE  LBB4_1655
 28033  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28034  	JLE  LBB4_1655
 28035  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28036  	LONG $0x04f98341         // cmp    r9d, 4
 28037  	JB   LBB4_297
 28038  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28039  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28040  	JBE  LBB4_710
 28041  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 28042  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28043  	JBE  LBB4_710
 28044  
 28045  LBB4_297:
 28046  	WORD $0xd231 // xor    edx, edx
 28047  
 28048  LBB4_1387:
 28049  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28050  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28051  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28052  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28053  	LONG $0x03e78348         // and    rdi, 3
 28054  	JE   LBB4_1389
 28055  
 28056  LBB4_1388:
 28057  	WORD $0xc031             // xor    eax, eax
 28058  	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
 28059  	WORD $0x950f; BYTE $0xd0 // setne    al
 28060  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 28061  	LONG $0x01c28348         // add    rdx, 1
 28062  	LONG $0xffc78348         // add    rdi, -1
 28063  	JNE  LBB4_1388
 28064  
 28065  LBB4_1389:
 28066  	LONG $0x03fe8348 // cmp    rsi, 3
 28067  	JB   LBB4_1655
 28068  
 28069  LBB4_1390:
 28070  	WORD $0xc031                 // xor    eax, eax
 28071  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 28072  	WORD $0x950f; BYTE $0xd0     // setne    al
 28073  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 28074  	WORD $0xc031                 // xor    eax, eax
 28075  	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
 28076  	WORD $0x950f; BYTE $0xd0     // setne    al
 28077  	LONG $0xd0448949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rax
 28078  	WORD $0xc031                 // xor    eax, eax
 28079  	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
 28080  	WORD $0x950f; BYTE $0xd0     // setne    al
 28081  	LONG $0xd0448949; BYTE $0x10 // mov    qword [r8 + 8*rdx + 16], rax
 28082  	WORD $0xc031                 // xor    eax, eax
 28083  	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
 28084  	WORD $0x950f; BYTE $0xd0     // setne    al
 28085  	LONG $0xd0448949; BYTE $0x18 // mov    qword [r8 + 8*rdx + 24], rax
 28086  	LONG $0x04c28348             // add    rdx, 4
 28087  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 28088  	JNE  LBB4_1390
 28089  	JMP  LBB4_1655
 28090  
 28091  LBB4_298:
 28092  	WORD $0xfe83; BYTE $0x07 // cmp    esi, 7
 28093  	JE   LBB4_491
 28094  	WORD $0xfe83; BYTE $0x08 // cmp    esi, 8
 28095  	JNE  LBB4_1655
 28096  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28097  	JLE  LBB4_1655
 28098  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28099  	LONG $0x04f98341         // cmp    r9d, 4
 28100  	JAE  LBB4_713
 28101  	WORD $0xd231             // xor    edx, edx
 28102  	JMP  LBB4_1025
 28103  
 28104  LBB4_303:
 28105  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28106  	JLE  LBB4_1655
 28107  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28108  	LONG $0x08f98341         // cmp    r9d, 8
 28109  	JAE  LBB4_716
 28110  	WORD $0xd231             // xor    edx, edx
 28111  	JMP  LBB4_1141
 28112  
 28113  LBB4_306:
 28114  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28115  	JLE  LBB4_1655
 28116  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28117  	LONG $0x08f98341         // cmp    r9d, 8
 28118  	JAE  LBB4_719
 28119  	WORD $0xd231             // xor    edx, edx
 28120  	JMP  LBB4_1146
 28121  
 28122  LBB4_309:
 28123  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28124  	JLE  LBB4_1655
 28125  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28126  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 28127  	LONG $0x04f98341         // cmp    r9d, 4
 28128  	JAE  LBB4_722
 28129  	WORD $0xf631             // xor    esi, esi
 28130  	JMP  LBB4_1151
 28131  
 28132  LBB4_312:
 28133  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28134  	JLE  LBB4_1655
 28135  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28136  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 28137  	LONG $0x04f98341         // cmp    r9d, 4
 28138  	JAE  LBB4_725
 28139  	WORD $0xf631             // xor    esi, esi
 28140  	JMP  LBB4_1157
 28141  
 28142  LBB4_315:
 28143  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28144  	JLE  LBB4_1655
 28145  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28146  	LONG $0x10f98341         // cmp    r9d, 16
 28147  	JB   LBB4_317
 28148  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28149  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28150  	JBE  LBB4_730
 28151  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28152  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28153  	JBE  LBB4_730
 28154  
 28155  LBB4_317:
 28156  	WORD $0xd231 // xor    edx, edx
 28157  
 28158  LBB4_1395:
 28159  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28160  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28161  	LONG $0x01c2f641             // test    r10b, 1
 28162  	JE   LBB4_1397
 28163  	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
 28164  	WORD $0xff31                 // xor    edi, edi
 28165  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 28166  	LONG $0xd7950f40             // setne    dil
 28167  	WORD $0xdff7                 // neg    edi
 28168  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 28169  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 28170  	WORD $0x4e0f; BYTE $0xc7     // cmovle    eax, edi
 28171  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28172  	LONG $0x01ca8348             // or    rdx, 1
 28173  
 28174  LBB4_1397:
 28175  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 28176  	JE   LBB4_1655
 28177  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28178  
 28179  LBB4_1399:
 28180  	LONG $0x1104b60f               // movzx    eax, byte [rcx + rdx]
 28181  	WORD $0xff31                   // xor    edi, edi
 28182  	WORD $0xc084                   // test    al, al
 28183  	LONG $0xd7950f40               // setne    dil
 28184  	WORD $0xdff7                   // neg    edi
 28185  	WORD $0xc084                   // test    al, al
 28186  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28187  	LONG $0x3c894166; BYTE $0x50   // mov    word [r8 + 2*rdx], di
 28188  	LONG $0x1144b60f; BYTE $0x01   // movzx    eax, byte [rcx + rdx + 1]
 28189  	WORD $0xff31                   // xor    edi, edi
 28190  	WORD $0xc084                   // test    al, al
 28191  	LONG $0xd7950f40               // setne    dil
 28192  	WORD $0xdff7                   // neg    edi
 28193  	WORD $0xc084                   // test    al, al
 28194  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28195  	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
 28196  	LONG $0x02c28348               // add    rdx, 2
 28197  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28198  	JNE  LBB4_1399
 28199  	JMP  LBB4_1655
 28200  
 28201  LBB4_318:
 28202  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28203  	JLE  LBB4_1655
 28204  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28205  	LONG $0x10f98341         // cmp    r9d, 16
 28206  	JB   LBB4_320
 28207  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28208  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28209  	JBE  LBB4_735
 28210  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28211  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28212  	JBE  LBB4_735
 28213  
 28214  LBB4_320:
 28215  	WORD $0xd231 // xor    edx, edx
 28216  
 28217  LBB4_1404:
 28218  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28219  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28220  	LONG $0x01c2f641             // test    r10b, 1
 28221  	JE   LBB4_1406
 28222  	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
 28223  	WORD $0xff31                 // xor    edi, edi
 28224  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 28225  	LONG $0xd7950f40             // setne    dil
 28226  	WORD $0xdff7                 // neg    edi
 28227  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 28228  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 28229  	WORD $0x4e0f; BYTE $0xc7     // cmovle    eax, edi
 28230  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28231  	LONG $0x01ca8348             // or    rdx, 1
 28232  
 28233  LBB4_1406:
 28234  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 28235  	JE   LBB4_1655
 28236  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28237  
 28238  LBB4_1408:
 28239  	LONG $0x1104b60f               // movzx    eax, byte [rcx + rdx]
 28240  	WORD $0xff31                   // xor    edi, edi
 28241  	WORD $0xc084                   // test    al, al
 28242  	LONG $0xd7950f40               // setne    dil
 28243  	WORD $0xdff7                   // neg    edi
 28244  	WORD $0xc084                   // test    al, al
 28245  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28246  	LONG $0x3c894166; BYTE $0x50   // mov    word [r8 + 2*rdx], di
 28247  	LONG $0x1144b60f; BYTE $0x01   // movzx    eax, byte [rcx + rdx + 1]
 28248  	WORD $0xff31                   // xor    edi, edi
 28249  	WORD $0xc084                   // test    al, al
 28250  	LONG $0xd7950f40               // setne    dil
 28251  	WORD $0xdff7                   // neg    edi
 28252  	WORD $0xc084                   // test    al, al
 28253  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28254  	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
 28255  	LONG $0x02c28348               // add    rdx, 2
 28256  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28257  	JNE  LBB4_1408
 28258  	JMP  LBB4_1655
 28259  
 28260  LBB4_321:
 28261  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28262  	JLE  LBB4_1655
 28263  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28264  	LONG $0x04f98341         // cmp    r9d, 4
 28265  	JAE  LBB4_738
 28266  	WORD $0xd231             // xor    edx, edx
 28267  	JMP  LBB4_1031
 28268  
 28269  LBB4_324:
 28270  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28271  	JLE  LBB4_1655
 28272  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28273  	LONG $0x04f98341         // cmp    r9d, 4
 28274  	JAE  LBB4_741
 28275  	WORD $0xd231             // xor    edx, edx
 28276  	JMP  LBB4_1036
 28277  
 28278  LBB4_327:
 28279  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28280  	JLE  LBB4_1655
 28281  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28282  	LONG $0x10f98341         // cmp    r9d, 16
 28283  	JB   LBB4_329
 28284  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 28285  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28286  	JBE  LBB4_746
 28287  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28288  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28289  	JBE  LBB4_746
 28290  
 28291  LBB4_329:
 28292  	WORD $0xd231 // xor    edx, edx
 28293  
 28294  LBB4_1413:
 28295  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28296  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28297  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28298  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28299  	LONG $0x03e78348         // and    rdi, 3
 28300  	JE   LBB4_1415
 28301  
 28302  LBB4_1414:
 28303  	WORD $0xc031                 // xor    eax, eax
 28304  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 28305  	WORD $0x950f; BYTE $0xd0     // setne    al
 28306  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28307  	LONG $0x01c28348             // add    rdx, 1
 28308  	LONG $0xffc78348             // add    rdi, -1
 28309  	JNE  LBB4_1414
 28310  
 28311  LBB4_1415:
 28312  	LONG $0x03fe8348 // cmp    rsi, 3
 28313  	JB   LBB4_1655
 28314  
 28315  LBB4_1416:
 28316  	WORD $0xc031                   // xor    eax, eax
 28317  	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
 28318  	WORD $0x950f; BYTE $0xd0       // setne    al
 28319  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28320  	WORD $0xc031                   // xor    eax, eax
 28321  	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
 28322  	WORD $0x950f; BYTE $0xd0       // setne    al
 28323  	LONG $0x44894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], ax
 28324  	WORD $0xc031                   // xor    eax, eax
 28325  	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
 28326  	WORD $0x950f; BYTE $0xd0       // setne    al
 28327  	LONG $0x44894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], ax
 28328  	WORD $0xc031                   // xor    eax, eax
 28329  	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
 28330  	WORD $0x950f; BYTE $0xd0       // setne    al
 28331  	LONG $0x44894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], ax
 28332  	LONG $0x04c28348               // add    rdx, 4
 28333  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28334  	JNE  LBB4_1416
 28335  	JMP  LBB4_1655
 28336  
 28337  LBB4_330:
 28338  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28339  	JLE  LBB4_1655
 28340  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28341  	LONG $0x10f98341         // cmp    r9d, 16
 28342  	JB   LBB4_332
 28343  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 28344  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28345  	JBE  LBB4_751
 28346  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28347  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28348  	JBE  LBB4_751
 28349  
 28350  LBB4_332:
 28351  	WORD $0xd231 // xor    edx, edx
 28352  
 28353  LBB4_1421:
 28354  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28355  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28356  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28357  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28358  	LONG $0x03e78348         // and    rdi, 3
 28359  	JE   LBB4_1423
 28360  
 28361  LBB4_1422:
 28362  	WORD $0xc031                 // xor    eax, eax
 28363  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 28364  	WORD $0x950f; BYTE $0xd0     // setne    al
 28365  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28366  	LONG $0x01c28348             // add    rdx, 1
 28367  	LONG $0xffc78348             // add    rdi, -1
 28368  	JNE  LBB4_1422
 28369  
 28370  LBB4_1423:
 28371  	LONG $0x03fe8348 // cmp    rsi, 3
 28372  	JB   LBB4_1655
 28373  
 28374  LBB4_1424:
 28375  	WORD $0xc031                   // xor    eax, eax
 28376  	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
 28377  	WORD $0x950f; BYTE $0xd0       // setne    al
 28378  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28379  	WORD $0xc031                   // xor    eax, eax
 28380  	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
 28381  	WORD $0x950f; BYTE $0xd0       // setne    al
 28382  	LONG $0x44894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], ax
 28383  	WORD $0xc031                   // xor    eax, eax
 28384  	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
 28385  	WORD $0x950f; BYTE $0xd0       // setne    al
 28386  	LONG $0x44894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], ax
 28387  	WORD $0xc031                   // xor    eax, eax
 28388  	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
 28389  	WORD $0x950f; BYTE $0xd0       // setne    al
 28390  	LONG $0x44894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], ax
 28391  	LONG $0x04c28348               // add    rdx, 4
 28392  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28393  	JNE  LBB4_1424
 28394  	JMP  LBB4_1655
 28395  
 28396  LBB4_333:
 28397  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28398  	JLE  LBB4_1655
 28399  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 28400  	LONG $0x10f98341         // cmp    r9d, 16
 28401  	JB   LBB4_335
 28402  	LONG $0x59148d4a         // lea    rdx, [rcx + 2*r11]
 28403  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28404  	JBE  LBB4_756
 28405  	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
 28406  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28407  	JBE  LBB4_756
 28408  
 28409  LBB4_335:
 28410  	WORD $0xd231 // xor    edx, edx
 28411  
 28412  LBB4_1429:
 28413  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28414  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28415  	LONG $0x01c3f641             // test    r11b, 1
 28416  	JE   LBB4_1431
 28417  	LONG $0x0cb70f44; BYTE $0x51 // movzx    r9d, word [rcx + 2*rdx]
 28418  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 28419  	LONG $0xc9854566             // test    r9w, r9w
 28420  	LONG $0xd2950f41             // setne    r10b
 28421  	WORD $0xf741; BYTE $0xda     // neg    r10d
 28422  	LONG $0xc9854566             // test    r9w, r9w
 28423  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 28424  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 28425  	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
 28426  	LONG $0x01ca8348             // or    rdx, 1
 28427  
 28428  LBB4_1431:
 28429  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 28430  	JE   LBB4_1655
 28431  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28432  
 28433  LBB4_1433:
 28434  	LONG $0x513cb70f               // movzx    edi, word [rcx + 2*rdx]
 28435  	WORD $0xc031                   // xor    eax, eax
 28436  	WORD $0x8566; BYTE $0xff       // test    di, di
 28437  	WORD $0x950f; BYTE $0xd0       // setne    al
 28438  	WORD $0xd8f7                   // neg    eax
 28439  	WORD $0x8566; BYTE $0xff       // test    di, di
 28440  	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
 28441  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28442  	LONG $0x5144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rdx + 2]
 28443  	WORD $0xff31                   // xor    edi, edi
 28444  	WORD $0x8566; BYTE $0xc0       // test    ax, ax
 28445  	LONG $0xd7950f40               // setne    dil
 28446  	WORD $0xdff7                   // neg    edi
 28447  	WORD $0x8566; BYTE $0xc0       // test    ax, ax
 28448  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28449  	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
 28450  	LONG $0x02c28348               // add    rdx, 2
 28451  	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
 28452  	JNE  LBB4_1433
 28453  	JMP  LBB4_1655
 28454  
 28455  LBB4_336:
 28456  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28457  	JLE  LBB4_1655
 28458  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 28459  	LONG $0x10f98341         // cmp    r9d, 16
 28460  	JB   LBB4_338
 28461  	LONG $0x59148d4a         // lea    rdx, [rcx + 2*r11]
 28462  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28463  	JBE  LBB4_761
 28464  	LONG $0x58148d4b         // lea    rdx, [r8 + 2*r11]
 28465  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28466  	JBE  LBB4_761
 28467  
 28468  LBB4_338:
 28469  	WORD $0xd231 // xor    edx, edx
 28470  
 28471  LBB4_1438:
 28472  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28473  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28474  	LONG $0x01c3f641             // test    r11b, 1
 28475  	JE   LBB4_1440
 28476  	LONG $0x0cb70f44; BYTE $0x51 // movzx    r9d, word [rcx + 2*rdx]
 28477  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 28478  	LONG $0xc9854566             // test    r9w, r9w
 28479  	LONG $0xd2950f41             // setne    r10b
 28480  	WORD $0xf741; BYTE $0xda     // neg    r10d
 28481  	LONG $0xc9854566             // test    r9w, r9w
 28482  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 28483  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 28484  	LONG $0x3c894166; BYTE $0x50 // mov    word [r8 + 2*rdx], di
 28485  	LONG $0x01ca8348             // or    rdx, 1
 28486  
 28487  LBB4_1440:
 28488  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 28489  	JE   LBB4_1655
 28490  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28491  
 28492  LBB4_1442:
 28493  	LONG $0x513cb70f               // movzx    edi, word [rcx + 2*rdx]
 28494  	WORD $0xc031                   // xor    eax, eax
 28495  	WORD $0x8566; BYTE $0xff       // test    di, di
 28496  	WORD $0x950f; BYTE $0xd0       // setne    al
 28497  	WORD $0xd8f7                   // neg    eax
 28498  	WORD $0x8566; BYTE $0xff       // test    di, di
 28499  	WORD $0x4f0f; BYTE $0xc6       // cmovg    eax, esi
 28500  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28501  	LONG $0x5144b70f; BYTE $0x02   // movzx    eax, word [rcx + 2*rdx + 2]
 28502  	WORD $0xff31                   // xor    edi, edi
 28503  	WORD $0x8566; BYTE $0xc0       // test    ax, ax
 28504  	LONG $0xd7950f40               // setne    dil
 28505  	WORD $0xdff7                   // neg    edi
 28506  	WORD $0x8566; BYTE $0xc0       // test    ax, ax
 28507  	WORD $0x4f0f; BYTE $0xfe       // cmovg    edi, esi
 28508  	LONG $0x7c894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], di
 28509  	LONG $0x02c28348               // add    rdx, 2
 28510  	WORD $0x3949; BYTE $0xd3       // cmp    r11, rdx
 28511  	JNE  LBB4_1442
 28512  	JMP  LBB4_1655
 28513  
 28514  LBB4_339:
 28515  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28516  	JLE  LBB4_1655
 28517  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28518  	LONG $0x04f98341         // cmp    r9d, 4
 28519  	JAE  LBB4_764
 28520  	WORD $0xd231             // xor    edx, edx
 28521  	JMP  LBB4_1041
 28522  
 28523  LBB4_342:
 28524  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28525  	JLE  LBB4_1655
 28526  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28527  	LONG $0x04f98341         // cmp    r9d, 4
 28528  	JAE  LBB4_767
 28529  	WORD $0xd231             // xor    edx, edx
 28530  	JMP  LBB4_1163
 28531  
 28532  LBB4_345:
 28533  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28534  	JLE  LBB4_1655
 28535  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28536  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 28537  	LONG $0x08f98341         // cmp    r9d, 8
 28538  	JAE  LBB4_770
 28539  	WORD $0xf631             // xor    esi, esi
 28540  	JMP  LBB4_1169
 28541  
 28542  LBB4_348:
 28543  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28544  	JLE  LBB4_1655
 28545  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28546  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 28547  	LONG $0x08f98341         // cmp    r9d, 8
 28548  	JAE  LBB4_773
 28549  	WORD $0xf631             // xor    esi, esi
 28550  	JMP  LBB4_1175
 28551  
 28552  LBB4_351:
 28553  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28554  	JLE  LBB4_1655
 28555  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28556  	LONG $0x10f98341         // cmp    r9d, 16
 28557  	JB   LBB4_353
 28558  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28559  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28560  	JBE  LBB4_778
 28561  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28562  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28563  	JBE  LBB4_778
 28564  
 28565  LBB4_353:
 28566  	WORD $0xd231 // xor    edx, edx
 28567  
 28568  LBB4_1447:
 28569  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28570  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28571  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28572  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28573  	LONG $0x03e78348         // and    rdi, 3
 28574  	JE   LBB4_1449
 28575  
 28576  LBB4_1448:
 28577  	WORD $0xc031                 // xor    eax, eax
 28578  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 28579  	WORD $0x950f; BYTE $0xd0     // setne    al
 28580  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28581  	LONG $0x01c28348             // add    rdx, 1
 28582  	LONG $0xffc78348             // add    rdi, -1
 28583  	JNE  LBB4_1448
 28584  
 28585  LBB4_1449:
 28586  	LONG $0x03fe8348 // cmp    rsi, 3
 28587  	JB   LBB4_1655
 28588  
 28589  LBB4_1450:
 28590  	WORD $0xc031                   // xor    eax, eax
 28591  	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
 28592  	WORD $0x950f; BYTE $0xd0       // setne    al
 28593  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28594  	WORD $0xc031                   // xor    eax, eax
 28595  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 28596  	WORD $0x950f; BYTE $0xd0       // setne    al
 28597  	LONG $0x44894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], ax
 28598  	WORD $0xc031                   // xor    eax, eax
 28599  	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
 28600  	WORD $0x950f; BYTE $0xd0       // setne    al
 28601  	LONG $0x44894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], ax
 28602  	WORD $0xc031                   // xor    eax, eax
 28603  	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
 28604  	WORD $0x950f; BYTE $0xd0       // setne    al
 28605  	LONG $0x44894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], ax
 28606  	LONG $0x04c28348               // add    rdx, 4
 28607  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28608  	JNE  LBB4_1450
 28609  	JMP  LBB4_1655
 28610  
 28611  LBB4_354:
 28612  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28613  	JLE  LBB4_1655
 28614  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28615  	LONG $0x10f98341         // cmp    r9d, 16
 28616  	JB   LBB4_356
 28617  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28618  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28619  	JBE  LBB4_783
 28620  	LONG $0x50148d4b         // lea    rdx, [r8 + 2*r10]
 28621  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28622  	JBE  LBB4_783
 28623  
 28624  LBB4_356:
 28625  	WORD $0xd231 // xor    edx, edx
 28626  
 28627  LBB4_1455:
 28628  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28629  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28630  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28631  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28632  	LONG $0x03e78348         // and    rdi, 3
 28633  	JE   LBB4_1457
 28634  
 28635  LBB4_1456:
 28636  	WORD $0xc031                 // xor    eax, eax
 28637  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 28638  	WORD $0x950f; BYTE $0xd0     // setne    al
 28639  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 28640  	LONG $0x01c28348             // add    rdx, 1
 28641  	LONG $0xffc78348             // add    rdi, -1
 28642  	JNE  LBB4_1456
 28643  
 28644  LBB4_1457:
 28645  	LONG $0x03fe8348 // cmp    rsi, 3
 28646  	JB   LBB4_1655
 28647  
 28648  LBB4_1458:
 28649  	WORD $0xc031                   // xor    eax, eax
 28650  	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
 28651  	WORD $0x950f; BYTE $0xd0       // setne    al
 28652  	LONG $0x04894166; BYTE $0x50   // mov    word [r8 + 2*rdx], ax
 28653  	WORD $0xc031                   // xor    eax, eax
 28654  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 28655  	WORD $0x950f; BYTE $0xd0       // setne    al
 28656  	LONG $0x44894166; WORD $0x0250 // mov    word [r8 + 2*rdx + 2], ax
 28657  	WORD $0xc031                   // xor    eax, eax
 28658  	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
 28659  	WORD $0x950f; BYTE $0xd0       // setne    al
 28660  	LONG $0x44894166; WORD $0x0450 // mov    word [r8 + 2*rdx + 4], ax
 28661  	WORD $0xc031                   // xor    eax, eax
 28662  	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
 28663  	WORD $0x950f; BYTE $0xd0       // setne    al
 28664  	LONG $0x44894166; WORD $0x0650 // mov    word [r8 + 2*rdx + 6], ax
 28665  	LONG $0x04c28348               // add    rdx, 4
 28666  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28667  	JNE  LBB4_1458
 28668  	JMP  LBB4_1655
 28669  
 28670  LBB4_357:
 28671  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28672  	JLE  LBB4_1655
 28673  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28674  	LONG $0x08f98341         // cmp    r9d, 8
 28675  	JAE  LBB4_786
 28676  	WORD $0xd231             // xor    edx, edx
 28677  	JMP  LBB4_1047
 28678  
 28679  LBB4_360:
 28680  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28681  	JLE  LBB4_1655
 28682  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28683  	LONG $0x08f98341         // cmp    r9d, 8
 28684  	JAE  LBB4_789
 28685  	WORD $0xd231             // xor    edx, edx
 28686  	JMP  LBB4_1053
 28687  
 28688  LBB4_363:
 28689  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28690  	JLE  LBB4_1655
 28691  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28692  	LONG $0x04f98341         // cmp    r9d, 4
 28693  	JAE  LBB4_792
 28694  	WORD $0xd231             // xor    edx, edx
 28695  	JMP  LBB4_1181
 28696  
 28697  LBB4_366:
 28698  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28699  	JLE  LBB4_1655
 28700  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28701  	LONG $0x08f98341         // cmp    r9d, 8
 28702  	JAE  LBB4_795
 28703  	WORD $0xd231             // xor    edx, edx
 28704  	JMP  LBB4_1186
 28705  
 28706  LBB4_369:
 28707  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28708  	JLE  LBB4_1655
 28709  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28710  	LONG $0x04f98341         // cmp    r9d, 4
 28711  	JAE  LBB4_798
 28712  	WORD $0xd231             // xor    edx, edx
 28713  	JMP  LBB4_1194
 28714  
 28715  LBB4_372:
 28716  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28717  	JLE  LBB4_1655
 28718  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28719  	LONG $0x04f98341         // cmp    r9d, 4
 28720  	JAE  LBB4_801
 28721  	WORD $0xd231             // xor    edx, edx
 28722  	JMP  LBB4_1200
 28723  
 28724  LBB4_375:
 28725  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28726  	JLE  LBB4_1655
 28727  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28728  	LONG $0x04f98341         // cmp    r9d, 4
 28729  	JB   LBB4_377
 28730  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 28731  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28732  	JBE  LBB4_806
 28733  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 28734  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28735  	JBE  LBB4_806
 28736  
 28737  LBB4_377:
 28738  	WORD $0xd231 // xor    edx, edx
 28739  
 28740  LBB4_1463:
 28741  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28742  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28743  	LONG $0x01c2f641             // test    r10b, 1
 28744  	JE   LBB4_1465
 28745  	WORD $0x048a; BYTE $0x11     // mov    al, byte [rcx + rdx]
 28746  	WORD $0xff31                 // xor    edi, edi
 28747  	WORD $0xc084                 // test    al, al
 28748  	LONG $0xd7950f40             // setne    dil
 28749  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 28750  	WORD $0xc084                 // test    al, al
 28751  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 28752  	LONG $0xc74e0f48             // cmovle    rax, rdi
 28753  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 28754  	LONG $0x01ca8348             // or    rdx, 1
 28755  
 28756  LBB4_1465:
 28757  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 28758  	JE   LBB4_1655
 28759  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28760  
 28761  LBB4_1467:
 28762  	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
 28763  	WORD $0xff31                 // xor    edi, edi
 28764  	WORD $0xc084                 // test    al, al
 28765  	LONG $0xd7950f40             // setne    dil
 28766  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 28767  	WORD $0xc084                 // test    al, al
 28768  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 28769  	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
 28770  	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
 28771  	WORD $0xff31                 // xor    edi, edi
 28772  	WORD $0xc084                 // test    al, al
 28773  	LONG $0xd7950f40             // setne    dil
 28774  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 28775  	WORD $0xc084                 // test    al, al
 28776  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 28777  	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
 28778  	LONG $0x02c28348             // add    rdx, 2
 28779  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 28780  	JNE  LBB4_1467
 28781  	JMP  LBB4_1655
 28782  
 28783  LBB4_378:
 28784  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28785  	JLE  LBB4_1655
 28786  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28787  	LONG $0x08f98341         // cmp    r9d, 8
 28788  	JB   LBB4_380
 28789  	LONG $0x01148d48         // lea    rdx, [rcx + rax]
 28790  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28791  	JBE  LBB4_811
 28792  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 28793  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28794  	JBE  LBB4_811
 28795  
 28796  LBB4_380:
 28797  	WORD $0xd231 // xor    edx, edx
 28798  
 28799  LBB4_1472:
 28800  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28801  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28802  	WORD $0x01a8             // test    al, 1
 28803  	JE   LBB4_1479
 28804  	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
 28805  	JNE  LBB4_1475
 28806  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 28807  	JMP  LBB4_1476
 28808  
 28809  LBB4_381:
 28810  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28811  	JLE  LBB4_1655
 28812  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28813  	LONG $0x04f98341         // cmp    r9d, 4
 28814  	JB   LBB4_383
 28815  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 28816  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28817  	JBE  LBB4_816
 28818  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 28819  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28820  	JBE  LBB4_816
 28821  
 28822  LBB4_383:
 28823  	WORD $0xd231 // xor    edx, edx
 28824  
 28825  LBB4_1494:
 28826  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 28827  	WORD $0xf748; BYTE $0xd6 // not    rsi
 28828  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 28829  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 28830  	LONG $0x03e78348         // and    rdi, 3
 28831  	JE   LBB4_1496
 28832  
 28833  LBB4_1495:
 28834  	WORD $0xc031                 // xor    eax, eax
 28835  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 28836  	WORD $0x950f; BYTE $0xd0     // setne    al
 28837  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 28838  	LONG $0x01c28348             // add    rdx, 1
 28839  	LONG $0xffc78348             // add    rdi, -1
 28840  	JNE  LBB4_1495
 28841  
 28842  LBB4_1496:
 28843  	LONG $0x03fe8348 // cmp    rsi, 3
 28844  	JB   LBB4_1655
 28845  
 28846  LBB4_1497:
 28847  	WORD $0xc031                   // xor    eax, eax
 28848  	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
 28849  	WORD $0x950f; BYTE $0xd0       // setne    al
 28850  	LONG $0xd0048949               // mov    qword [r8 + 8*rdx], rax
 28851  	WORD $0xc031                   // xor    eax, eax
 28852  	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
 28853  	WORD $0x950f; BYTE $0xd0       // setne    al
 28854  	LONG $0xd0448949; BYTE $0x08   // mov    qword [r8 + 8*rdx + 8], rax
 28855  	WORD $0xc031                   // xor    eax, eax
 28856  	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
 28857  	WORD $0x950f; BYTE $0xd0       // setne    al
 28858  	LONG $0xd0448949; BYTE $0x10   // mov    qword [r8 + 8*rdx + 16], rax
 28859  	WORD $0xc031                   // xor    eax, eax
 28860  	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
 28861  	WORD $0x950f; BYTE $0xd0       // setne    al
 28862  	LONG $0xd0448949; BYTE $0x18   // mov    qword [r8 + 8*rdx + 24], rax
 28863  	LONG $0x04c28348               // add    rdx, 4
 28864  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 28865  	JNE  LBB4_1497
 28866  	JMP  LBB4_1655
 28867  
 28868  LBB4_384:
 28869  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28870  	JLE  LBB4_1655
 28871  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 28872  	LONG $0xff728d48         // lea    rsi, [rdx - 1]
 28873  	WORD $0xd089             // mov    eax, edx
 28874  	WORD $0xe083; BYTE $0x03 // and    eax, 3
 28875  	LONG $0x03fe8348         // cmp    rsi, 3
 28876  	JAE  LBB4_819
 28877  	WORD $0xf631             // xor    esi, esi
 28878  
 28879  LBB4_387:
 28880  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 28881  	JE   LBB4_1655
 28882  	LONG $0xb0148d49         // lea    rdx, [r8 + 4*rsi]
 28883  	LONG $0xf10c8d48         // lea    rcx, [rcx + 8*rsi]
 28884  	WORD $0xf631             // xor    esi, esi
 28885  	QUAD $0x0000012885100ff3 // movss    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 28886  	JMP  LBB4_390
 28887  
 28888  LBB4_389:
 28889  	LONG $0x0c110ff3; BYTE $0xb2 // movss    dword [rdx + 4*rsi], xmm1
 28890  	LONG $0x01c68348             // add    rsi, 1
 28891  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 28892  	JE   LBB4_1655
 28893  
 28894  LBB4_390:
 28895  	LONG $0xf13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rsi], 0
 28896  	LONG $0xc8280f66             // movapd    xmm1, xmm0
 28897  	JNE  LBB4_389
 28898  	LONG $0xc9570f66             // xorpd    xmm1, xmm1
 28899  	JMP  LBB4_389
 28900  
 28901  LBB4_392:
 28902  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28903  	JLE  LBB4_1655
 28904  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28905  	LONG $0x04f98341         // cmp    r9d, 4
 28906  	JAE  LBB4_829
 28907  	WORD $0xd231             // xor    edx, edx
 28908  	JMP  LBB4_1059
 28909  
 28910  LBB4_395:
 28911  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28912  	JLE  LBB4_1655
 28913  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28914  	LONG $0x08f98341         // cmp    r9d, 8
 28915  	JAE  LBB4_832
 28916  	WORD $0xd231             // xor    edx, edx
 28917  	JMP  LBB4_1208
 28918  
 28919  LBB4_398:
 28920  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28921  	JLE  LBB4_1655
 28922  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 28923  	LONG $0x04f98341         // cmp    r9d, 4
 28924  	JAE  LBB4_835
 28925  	WORD $0xd231             // xor    edx, edx
 28926  	JMP  LBB4_1216
 28927  
 28928  LBB4_401:
 28929  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28930  	JLE  LBB4_1655
 28931  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 28932  	LONG $0x08f98341         // cmp    r9d, 8
 28933  	JAE  LBB4_838
 28934  	WORD $0xd231             // xor    edx, edx
 28935  	JMP  LBB4_1222
 28936  
 28937  LBB4_404:
 28938  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28939  	JLE  LBB4_1655
 28940  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 28941  	LONG $0x04f98341         // cmp    r9d, 4
 28942  	JB   LBB4_406
 28943  	LONG $0xd9148d4a         // lea    rdx, [rcx + 8*r11]
 28944  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 28945  	JBE  LBB4_843
 28946  	LONG $0xd8148d4b         // lea    rdx, [r8 + 8*r11]
 28947  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 28948  	JBE  LBB4_843
 28949  
 28950  LBB4_406:
 28951  	WORD $0xd231 // xor    edx, edx
 28952  
 28953  LBB4_1502:
 28954  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 28955  	WORD $0xf748; BYTE $0xd6     // not    rsi
 28956  	LONG $0x01c3f641             // test    r11b, 1
 28957  	JE   LBB4_1504
 28958  	LONG $0xd10c8b4c             // mov    r9, qword [rcx + 8*rdx]
 28959  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 28960  	WORD $0x854d; BYTE $0xc9     // test    r9, r9
 28961  	LONG $0xd2950f41             // setne    r10b
 28962  	WORD $0xf749; BYTE $0xda     // neg    r10
 28963  	WORD $0x854d; BYTE $0xc9     // test    r9, r9
 28964  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 28965  	LONG $0xfa4e0f49             // cmovle    rdi, r10
 28966  	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
 28967  	LONG $0x01ca8348             // or    rdx, 1
 28968  
 28969  LBB4_1504:
 28970  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 28971  	JE   LBB4_1655
 28972  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 28973  
 28974  LBB4_1506:
 28975  	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
 28976  	WORD $0xc031                 // xor    eax, eax
 28977  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 28978  	WORD $0x950f; BYTE $0xd0     // setne    al
 28979  	WORD $0xf748; BYTE $0xd8     // neg    rax
 28980  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 28981  	LONG $0xc64f0f48             // cmovg    rax, rsi
 28982  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 28983  	LONG $0xd1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rdx + 8]
 28984  	WORD $0xff31                 // xor    edi, edi
 28985  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 28986  	LONG $0xd7950f40             // setne    dil
 28987  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 28988  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 28989  	LONG $0xfe4f0f48             // cmovg    rdi, rsi
 28990  	LONG $0xd07c8949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rdi
 28991  	LONG $0x02c28348             // add    rdx, 2
 28992  	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
 28993  	JNE  LBB4_1506
 28994  	JMP  LBB4_1655
 28995  
 28996  LBB4_407:
 28997  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 28998  	JLE  LBB4_1655
 28999  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 29000  	LONG $0x01f98341         // cmp    r9d, 1
 29001  	JNE  LBB4_846
 29002  	WORD $0xc031             // xor    eax, eax
 29003  
 29004  LBB4_410:
 29005  	WORD $0xc2f6; BYTE $0x01     // test    dl, 1
 29006  	JE   LBB4_1655
 29007  	LONG $0xc13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rax], 0
 29008  	JNE  LBB4_989
 29009  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 29010  	JMP  LBB4_990
 29011  
 29012  LBB4_413:
 29013  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29014  	JLE  LBB4_1655
 29015  	WORD $0x8944; BYTE $0xca // mov    edx, r9d
 29016  	LONG $0x01f98341         // cmp    r9d, 1
 29017  	JNE  LBB4_856
 29018  	WORD $0xc031             // xor    eax, eax
 29019  	JMP  LBB4_416
 29020  
 29021  LBB4_419:
 29022  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29023  	JLE  LBB4_1655
 29024  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29025  	LONG $0x08f98341         // cmp    r9d, 8
 29026  	JB   LBB4_421
 29027  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 29028  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29029  	JBE  LBB4_866
 29030  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 29031  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29032  	JBE  LBB4_866
 29033  
 29034  LBB4_421:
 29035  	WORD $0xd231 // xor    edx, edx
 29036  
 29037  LBB4_869:
 29038  	WORD $0x8948; BYTE $0xd6       // mov    rsi, rdx
 29039  	WORD $0xf748; BYTE $0xd6       // not    rsi
 29040  	WORD $0x01a8                   // test    al, 1
 29041  	JE   LBB4_871
 29042  	LONG $0x04100ff3; BYTE $0x91   // movss    xmm0, dword [rcx + 4*rdx]
 29043  	WORD $0x500f; BYTE $0xf8       // movmskps    edi, xmm0
 29044  	WORD $0xe783; BYTE $0x01       // and    edi, 1
 29045  	WORD $0xdff7                   // neg    edi
 29046  	WORD $0xcf83; BYTE $0x01       // or    edi, 1
 29047  	WORD $0x570f; BYTE $0xc9       // xorps    xmm1, xmm1
 29048  	LONG $0xcf2a0ff3               // cvtsi2ss    xmm1, edi
 29049  	WORD $0x570f; BYTE $0xd2       // xorps    xmm2, xmm2
 29050  	LONG $0xd0c20ff3; BYTE $0x00   // cmpeqss    xmm2, xmm0
 29051  	WORD $0x550f; BYTE $0xd1       // andnps    xmm2, xmm1
 29052  	LONG $0x110f41f3; WORD $0x9014 // movss    dword [r8 + 4*rdx], xmm2
 29053  	LONG $0x01ca8348               // or    rdx, 1
 29054  
 29055  LBB4_871:
 29056  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29057  	JE   LBB4_1655
 29058  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 29059  
 29060  LBB4_873:
 29061  	LONG $0x0c100ff3; BYTE $0x91               // movss    xmm1, dword [rcx + 4*rdx]
 29062  	WORD $0x500f; BYTE $0xf1                   // movmskps    esi, xmm1
 29063  	WORD $0xe683; BYTE $0x01                   // and    esi, 1
 29064  	WORD $0xdef7                               // neg    esi
 29065  	WORD $0xce83; BYTE $0x01                   // or    esi, 1
 29066  	WORD $0x570f; BYTE $0xd2                   // xorps    xmm2, xmm2
 29067  	LONG $0xd62a0ff3                           // cvtsi2ss    xmm2, esi
 29068  	LONG $0xc8c20ff3; BYTE $0x00               // cmpeqss    xmm1, xmm0
 29069  	WORD $0x550f; BYTE $0xca                   // andnps    xmm1, xmm2
 29070  	LONG $0x110f41f3; WORD $0x900c             // movss    dword [r8 + 4*rdx], xmm1
 29071  	LONG $0x4c100ff3; WORD $0x0491             // movss    xmm1, dword [rcx + 4*rdx + 4]
 29072  	WORD $0x500f; BYTE $0xf1                   // movmskps    esi, xmm1
 29073  	WORD $0xe683; BYTE $0x01                   // and    esi, 1
 29074  	WORD $0xdef7                               // neg    esi
 29075  	WORD $0xce83; BYTE $0x01                   // or    esi, 1
 29076  	WORD $0x570f; BYTE $0xd2                   // xorps    xmm2, xmm2
 29077  	LONG $0xd62a0ff3                           // cvtsi2ss    xmm2, esi
 29078  	LONG $0xc8c20ff3; BYTE $0x00               // cmpeqss    xmm1, xmm0
 29079  	WORD $0x550f; BYTE $0xca                   // andnps    xmm1, xmm2
 29080  	LONG $0x110f41f3; WORD $0x904c; BYTE $0x04 // movss    dword [r8 + 4*rdx + 4], xmm1
 29081  	LONG $0x02c28348                           // add    rdx, 2
 29082  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 29083  	JNE  LBB4_873
 29084  	JMP  LBB4_1655
 29085  
 29086  LBB4_422:
 29087  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29088  	JLE  LBB4_1655
 29089  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29090  	LONG $0x04f98341         // cmp    r9d, 4
 29091  	JB   LBB4_424
 29092  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 29093  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29094  	JBE  LBB4_876
 29095  	LONG $0xd0148d4b         // lea    rdx, [r8 + 8*r10]
 29096  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29097  	JBE  LBB4_876
 29098  
 29099  LBB4_424:
 29100  	WORD $0xd231 // xor    edx, edx
 29101  
 29102  LBB4_1511:
 29103  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29104  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29105  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 29106  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 29107  	LONG $0x03e78348         // and    rdi, 3
 29108  	JE   LBB4_1513
 29109  
 29110  LBB4_1512:
 29111  	WORD $0xc031             // xor    eax, eax
 29112  	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
 29113  	WORD $0x950f; BYTE $0xd0 // setne    al
 29114  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 29115  	LONG $0x01c28348         // add    rdx, 1
 29116  	LONG $0xffc78348         // add    rdi, -1
 29117  	JNE  LBB4_1512
 29118  
 29119  LBB4_1513:
 29120  	LONG $0x03fe8348 // cmp    rsi, 3
 29121  	JB   LBB4_1655
 29122  
 29123  LBB4_1514:
 29124  	WORD $0xc031                 // xor    eax, eax
 29125  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 29126  	WORD $0x950f; BYTE $0xd0     // setne    al
 29127  	LONG $0xd0048949             // mov    qword [r8 + 8*rdx], rax
 29128  	WORD $0xc031                 // xor    eax, eax
 29129  	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
 29130  	WORD $0x950f; BYTE $0xd0     // setne    al
 29131  	LONG $0xd0448949; BYTE $0x08 // mov    qword [r8 + 8*rdx + 8], rax
 29132  	WORD $0xc031                 // xor    eax, eax
 29133  	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
 29134  	WORD $0x950f; BYTE $0xd0     // setne    al
 29135  	LONG $0xd0448949; BYTE $0x10 // mov    qword [r8 + 8*rdx + 16], rax
 29136  	WORD $0xc031                 // xor    eax, eax
 29137  	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
 29138  	WORD $0x950f; BYTE $0xd0     // setne    al
 29139  	LONG $0xd0448949; BYTE $0x18 // mov    qword [r8 + 8*rdx + 24], rax
 29140  	LONG $0x04c28348             // add    rdx, 4
 29141  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 29142  	JNE  LBB4_1514
 29143  	JMP  LBB4_1655
 29144  
 29145  LBB4_425:
 29146  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29147  	JLE  LBB4_1655
 29148  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29149  	LONG $0x08f98341         // cmp    r9d, 8
 29150  	JB   LBB4_427
 29151  	LONG $0x01148d48         // lea    rdx, [rcx + rax]
 29152  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29153  	JBE  LBB4_881
 29154  	LONG $0x80148d49         // lea    rdx, [r8 + 4*rax]
 29155  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29156  	JBE  LBB4_881
 29157  
 29158  LBB4_427:
 29159  	WORD $0xd231 // xor    edx, edx
 29160  
 29161  LBB4_1519:
 29162  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29163  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29164  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29165  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 29166  	LONG $0x03e78348         // and    rdi, 3
 29167  	JE   LBB4_1524
 29168  	QUAD $0x00000128856e0f66 // movd    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 29169  	JMP  LBB4_1522
 29170  
 29171  LBB4_1521:
 29172  	LONG $0x7e0f4166; WORD $0x900c // movd    dword [r8 + 4*rdx], xmm1
 29173  	LONG $0x01c28348               // add    rdx, 1
 29174  	LONG $0xffc78348               // add    rdi, -1
 29175  	JE   LBB4_1524
 29176  
 29177  LBB4_1522:
 29178  	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
 29179  	LONG $0xc86f0f66 // movdqa    xmm1, xmm0
 29180  	JNE  LBB4_1521
 29181  	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
 29182  	JMP  LBB4_1521
 29183  
 29184  LBB4_428:
 29185  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29186  	JLE  LBB4_1655
 29187  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29188  	LONG $0x04f98341         // cmp    r9d, 4
 29189  	JAE  LBB4_884
 29190  	WORD $0xd231             // xor    edx, edx
 29191  	JMP  LBB4_1064
 29192  
 29193  LBB4_431:
 29194  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29195  	JLE  LBB4_1655
 29196  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29197  	LONG $0x08f98341         // cmp    r9d, 8
 29198  	JAE  LBB4_887
 29199  	WORD $0xd231             // xor    edx, edx
 29200  	JMP  LBB4_1070
 29201  
 29202  LBB4_434:
 29203  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29204  	JLE  LBB4_1655
 29205  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29206  	LONG $0x08f98341         // cmp    r9d, 8
 29207  	JB   LBB4_436
 29208  	LONG $0x81148d48         // lea    rdx, [rcx + 4*rax]
 29209  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29210  	JBE  LBB4_892
 29211  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 29212  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29213  	JBE  LBB4_892
 29214  
 29215  LBB4_436:
 29216  	WORD $0xd231 // xor    edx, edx
 29217  
 29218  LBB4_1539:
 29219  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29220  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29221  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29222  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 29223  	LONG $0x03e78348         // and    rdi, 3
 29224  	JE   LBB4_1541
 29225  
 29226  LBB4_1540:
 29227  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 29228  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 29229  	LONG $0x01c28348             // add    rdx, 1
 29230  	LONG $0xffc78348             // add    rdi, -1
 29231  	JNE  LBB4_1540
 29232  
 29233  LBB4_1541:
 29234  	LONG $0x03fe8348 // cmp    rsi, 3
 29235  	JB   LBB4_1655
 29236  
 29237  LBB4_1542:
 29238  	LONG $0x00913c83               // cmp    dword [rcx + 4*rdx], 0
 29239  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 29240  	LONG $0x04917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 4], 0
 29241  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 29242  	LONG $0x08917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 8], 0
 29243  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 29244  	LONG $0x0c917c83; BYTE $0x00   // cmp    dword [rcx + 4*rdx + 12], 0
 29245  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 29246  	LONG $0x04c28348               // add    rdx, 4
 29247  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 29248  	JNE  LBB4_1542
 29249  	JMP  LBB4_1655
 29250  
 29251  LBB4_437:
 29252  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29253  	JLE  LBB4_1655
 29254  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29255  	LONG $0x04f98341         // cmp    r9d, 4
 29256  	JB   LBB4_439
 29257  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 29258  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29259  	JBE  LBB4_897
 29260  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 29261  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29262  	JBE  LBB4_897
 29263  
 29264  LBB4_439:
 29265  	WORD $0xd231 // xor    edx, edx
 29266  
 29267  LBB4_1547:
 29268  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 29269  	WORD $0xf748; BYTE $0xd6     // not    rsi
 29270  	WORD $0x01a8                 // test    al, 1
 29271  	JE   LBB4_1549
 29272  	LONG $0x04100ff2; BYTE $0xd1 // movsd    xmm0, qword [rcx + 8*rdx]
 29273  	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
 29274  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 29275  	LONG $0xc82e0f66             // ucomisd    xmm1, xmm0
 29276  	LONG $0x45540f66; BYTE $0x00 // andpd    xmm0, oword 0[rbp] /* [rip + .LCPI4_0] */
 29277  	QUAD $0x000001108d100ff2     // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 29278  	LONG $0xc8560f66             // orpd    xmm1, xmm0
 29279  	LONG $0xf92c0ff2             // cvttsd2si    edi, xmm1
 29280  	LONG $0xf9440f41             // cmove    edi, r9d
 29281  	LONG $0x103c8841             // mov    byte [r8 + rdx], dil
 29282  	LONG $0x01ca8348             // or    rdx, 1
 29283  
 29284  LBB4_1549:
 29285  	WORD $0x0148; BYTE $0xc6     // add    rsi, rax
 29286  	JE   LBB4_1655
 29287  	WORD $0xf631                 // xor    esi, esi
 29288  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 29289  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 29290  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 29291  
 29292  LBB4_1551:
 29293  	LONG $0x1c100ff2; BYTE $0xd1   // movsd    xmm3, qword [rcx + 8*rdx]
 29294  	LONG $0xc32e0f66               // ucomisd    xmm0, xmm3
 29295  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 29296  	LONG $0xda560f66               // orpd    xmm3, xmm2
 29297  	LONG $0xfb2c0ff2               // cvttsd2si    edi, xmm3
 29298  	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
 29299  	LONG $0x103c8841               // mov    byte [r8 + rdx], dil
 29300  	LONG $0x5c100ff2; WORD $0x08d1 // movsd    xmm3, qword [rcx + 8*rdx + 8]
 29301  	LONG $0xc32e0f66               // ucomisd    xmm0, xmm3
 29302  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 29303  	LONG $0xda560f66               // orpd    xmm3, xmm2
 29304  	LONG $0xfb2c0ff2               // cvttsd2si    edi, xmm3
 29305  	WORD $0x440f; BYTE $0xfe       // cmove    edi, esi
 29306  	LONG $0x107c8841; BYTE $0x01   // mov    byte [r8 + rdx + 1], dil
 29307  	LONG $0x02c28348               // add    rdx, 2
 29308  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 29309  	JNE  LBB4_1551
 29310  	JMP  LBB4_1655
 29311  
 29312  LBB4_440:
 29313  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29314  	JLE  LBB4_1655
 29315  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29316  	LONG $0x20f98341         // cmp    r9d, 32
 29317  	JB   LBB4_442
 29318  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 29319  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29320  	JBE  LBB4_902
 29321  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 29322  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29323  	JBE  LBB4_902
 29324  
 29325  LBB4_442:
 29326  	WORD $0xf631 // xor    esi, esi
 29327  
 29328  LBB4_1556:
 29329  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 29330  	WORD $0xf748; BYTE $0xd0     // not    rax
 29331  	LONG $0x01c2f641             // test    r10b, 1
 29332  	JE   LBB4_1558
 29333  	LONG $0x313c8a40             // mov    dil, byte [rcx + rsi]
 29334  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 29335  	LONG $0xd1950f41             // setne    r9b
 29336  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 29337  	WORD $0x8440; BYTE $0xff     // test    dil, dil
 29338  	LONG $0xc9b60f45             // movzx    r9d, r9b
 29339  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29340  	LONG $0xf94e0f41             // cmovle    edi, r9d
 29341  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 29342  	LONG $0x01ce8348             // or    rsi, 1
 29343  
 29344  LBB4_1558:
 29345  	WORD $0x014c; BYTE $0xd0     // add    rax, r10
 29346  	JE   LBB4_1655
 29347  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29348  
 29349  LBB4_1560:
 29350  	LONG $0x3104b60f             // movzx    eax, byte [rcx + rsi]
 29351  	WORD $0xc084                 // test    al, al
 29352  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29353  	WORD $0xdaf6                 // neg    dl
 29354  	WORD $0xc084                 // test    al, al
 29355  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29356  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 29357  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 29358  	LONG $0x3144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rsi + 1]
 29359  	WORD $0xc084                 // test    al, al
 29360  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29361  	WORD $0xdaf6                 // neg    dl
 29362  	WORD $0xc084                 // test    al, al
 29363  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29364  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 29365  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 29366  	LONG $0x02c68348             // add    rsi, 2
 29367  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 29368  	JNE  LBB4_1560
 29369  	JMP  LBB4_1655
 29370  
 29371  LBB4_443:
 29372  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29373  	JLE  LBB4_1655
 29374  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29375  	LONG $0x04f98341         // cmp    r9d, 4
 29376  	JB   LBB4_445
 29377  	LONG $0xc1148d48         // lea    rdx, [rcx + 8*rax]
 29378  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29379  	JBE  LBB4_907
 29380  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 29381  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29382  	JBE  LBB4_907
 29383  
 29384  LBB4_445:
 29385  	WORD $0xd231 // xor    edx, edx
 29386  
 29387  LBB4_1565:
 29388  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29389  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29390  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29391  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 29392  	LONG $0x03e78348         // and    rdi, 3
 29393  	JE   LBB4_1567
 29394  
 29395  LBB4_1566:
 29396  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 29397  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 29398  	LONG $0x01c28348             // add    rdx, 1
 29399  	LONG $0xffc78348             // add    rdi, -1
 29400  	JNE  LBB4_1566
 29401  
 29402  LBB4_1567:
 29403  	LONG $0x03fe8348 // cmp    rsi, 3
 29404  	JB   LBB4_1655
 29405  
 29406  LBB4_1568:
 29407  	LONG $0xd13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rdx], 0
 29408  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 29409  	LONG $0xd17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rdx + 8], 0
 29410  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 29411  	LONG $0xd17c8348; WORD $0x0010 // cmp    qword [rcx + 8*rdx + 16], 0
 29412  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 29413  	LONG $0xd17c8348; WORD $0x0018 // cmp    qword [rcx + 8*rdx + 24], 0
 29414  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 29415  	LONG $0x04c28348               // add    rdx, 4
 29416  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 29417  	JNE  LBB4_1568
 29418  	JMP  LBB4_1655
 29419  
 29420  LBB4_446:
 29421  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29422  	JLE  LBB4_1655
 29423  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29424  	LONG $0x10f98341         // cmp    r9d, 16
 29425  	JB   LBB4_448
 29426  	LONG $0x41148d48         // lea    rdx, [rcx + 2*rax]
 29427  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29428  	JBE  LBB4_912
 29429  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 29430  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29431  	JBE  LBB4_912
 29432  
 29433  LBB4_448:
 29434  	WORD $0xd231 // xor    edx, edx
 29435  
 29436  LBB4_1573:
 29437  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29438  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29439  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29440  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 29441  	LONG $0x03e78348         // and    rdi, 3
 29442  	JE   LBB4_1575
 29443  
 29444  LBB4_1574:
 29445  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 29446  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 29447  	LONG $0x01c28348             // add    rdx, 1
 29448  	LONG $0xffc78348             // add    rdi, -1
 29449  	JNE  LBB4_1574
 29450  
 29451  LBB4_1575:
 29452  	LONG $0x03fe8348 // cmp    rsi, 3
 29453  	JB   LBB4_1655
 29454  
 29455  LBB4_1576:
 29456  	LONG $0x513c8366; BYTE $0x00   // cmp    word [rcx + 2*rdx], 0
 29457  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 29458  	LONG $0x517c8366; WORD $0x0002 // cmp    word [rcx + 2*rdx + 2], 0
 29459  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 29460  	LONG $0x517c8366; WORD $0x0004 // cmp    word [rcx + 2*rdx + 4], 0
 29461  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 29462  	LONG $0x517c8366; WORD $0x0006 // cmp    word [rcx + 2*rdx + 6], 0
 29463  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 29464  	LONG $0x04c28348               // add    rdx, 4
 29465  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 29466  	JNE  LBB4_1576
 29467  	JMP  LBB4_1655
 29468  
 29469  LBB4_449:
 29470  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29471  	JLE  LBB4_1655
 29472  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29473  	LONG $0x10f98341         // cmp    r9d, 16
 29474  	JB   LBB4_451
 29475  	LONG $0x51148d4a         // lea    rdx, [rcx + 2*r10]
 29476  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29477  	JBE  LBB4_917
 29478  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 29479  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29480  	JBE  LBB4_917
 29481  
 29482  LBB4_451:
 29483  	WORD $0xf631 // xor    esi, esi
 29484  
 29485  LBB4_1581:
 29486  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 29487  	WORD $0xf748; BYTE $0xd0     // not    rax
 29488  	LONG $0x01c2f641             // test    r10b, 1
 29489  	JE   LBB4_1583
 29490  	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
 29491  	WORD $0x8566; BYTE $0xff     // test    di, di
 29492  	LONG $0xd1950f41             // setne    r9b
 29493  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 29494  	WORD $0x8566; BYTE $0xff     // test    di, di
 29495  	LONG $0xc9b60f45             // movzx    r9d, r9b
 29496  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29497  	LONG $0xf94e0f41             // cmovle    edi, r9d
 29498  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 29499  	LONG $0x01ce8348             // or    rsi, 1
 29500  
 29501  LBB4_1583:
 29502  	WORD $0x014c; BYTE $0xd0       // add    rax, r10
 29503  	JE   LBB4_1655
 29504  	LONG $0x0001b941; WORD $0x0000 // mov    r9d, 1
 29505  
 29506  LBB4_1585:
 29507  	LONG $0x713cb70f             // movzx    edi, word [rcx + 2*rsi]
 29508  	WORD $0x8566; BYTE $0xff     // test    di, di
 29509  	WORD $0x950f; BYTE $0xd0     // setne    al
 29510  	WORD $0xd8f6                 // neg    al
 29511  	WORD $0x8566; BYTE $0xff     // test    di, di
 29512  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 29513  	LONG $0xc14f0f41             // cmovg    eax, r9d
 29514  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 29515  	LONG $0x7144b70f; BYTE $0x02 // movzx    eax, word [rcx + 2*rsi + 2]
 29516  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 29517  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29518  	WORD $0xdaf6                 // neg    dl
 29519  	WORD $0x8566; BYTE $0xc0     // test    ax, ax
 29520  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29521  	LONG $0xc14f0f41             // cmovg    eax, r9d
 29522  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 29523  	LONG $0x02c68348             // add    rsi, 2
 29524  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 29525  	JNE  LBB4_1585
 29526  	JMP  LBB4_1655
 29527  
 29528  LBB4_452:
 29529  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29530  	JLE  LBB4_1655
 29531  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29532  	LONG $0x04f98341         // cmp    r9d, 4
 29533  	JB   LBB4_454
 29534  	LONG $0xd1148d4a         // lea    rdx, [rcx + 8*r10]
 29535  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29536  	JBE  LBB4_922
 29537  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 29538  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29539  	JBE  LBB4_922
 29540  
 29541  LBB4_454:
 29542  	WORD $0xf631 // xor    esi, esi
 29543  
 29544  LBB4_1590:
 29545  	WORD $0x8948; BYTE $0xf2     // mov    rdx, rsi
 29546  	WORD $0xf748; BYTE $0xd2     // not    rdx
 29547  	LONG $0x01c2f641             // test    r10b, 1
 29548  	JE   LBB4_1592
 29549  	LONG $0xf13c8b48             // mov    rdi, qword [rcx + 8*rsi]
 29550  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 29551  	WORD $0x950f; BYTE $0xd0     // setne    al
 29552  	WORD $0xd8f6                 // neg    al
 29553  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 29554  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 29555  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29556  	WORD $0x4e0f; BYTE $0xf8     // cmovle    edi, eax
 29557  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 29558  	LONG $0x01ce8348             // or    rsi, 1
 29559  
 29560  LBB4_1592:
 29561  	WORD $0x014c; BYTE $0xd2     // add    rdx, r10
 29562  	JE   LBB4_1655
 29563  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29564  
 29565  LBB4_1594:
 29566  	LONG $0xf1048b48             // mov    rax, qword [rcx + 8*rsi]
 29567  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 29568  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29569  	WORD $0xdaf6                 // neg    dl
 29570  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 29571  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29572  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 29573  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 29574  	LONG $0xf1448b48; BYTE $0x08 // mov    rax, qword [rcx + 8*rsi + 8]
 29575  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 29576  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29577  	WORD $0xdaf6                 // neg    dl
 29578  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 29579  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29580  	WORD $0x4f0f; BYTE $0xc7     // cmovg    eax, edi
 29581  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 29582  	LONG $0x02c68348             // add    rsi, 2
 29583  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 29584  	JNE  LBB4_1594
 29585  	JMP  LBB4_1655
 29586  
 29587  LBB4_455:
 29588  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29589  	JLE  LBB4_1655
 29590  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29591  	LONG $0x08f98341         // cmp    r9d, 8
 29592  	JB   LBB4_457
 29593  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 29594  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29595  	JBE  LBB4_927
 29596  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 29597  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29598  	JBE  LBB4_927
 29599  
 29600  LBB4_457:
 29601  	WORD $0xd231 // xor    edx, edx
 29602  
 29603  LBB4_1599:
 29604  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 29605  	WORD $0xf748; BYTE $0xd6     // not    rsi
 29606  	LONG $0x01c2f641             // test    r10b, 1
 29607  	JE   LBB4_1601
 29608  	LONG $0x046e0f66; BYTE $0x91 // movd    xmm0, dword [rcx + 4*rdx]
 29609  	LONG $0xc77e0f66             // movd    edi, xmm0
 29610  	WORD $0xff85                 // test    edi, edi
 29611  	WORD $0x990f; BYTE $0xd0     // setns    al
 29612  	WORD $0xc000                 // add    al, al
 29613  	WORD $0xff04                 // add    al, -1
 29614  	WORD $0xff31                 // xor    edi, edi
 29615  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 29616  	WORD $0x2e0f; BYTE $0xc8     // ucomiss    xmm1, xmm0
 29617  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 29618  	WORD $0x440f; BYTE $0xc7     // cmove    eax, edi
 29619  	LONG $0x10048841             // mov    byte [r8 + rdx], al
 29620  	LONG $0x01ca8348             // or    rdx, 1
 29621  
 29622  LBB4_1601:
 29623  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 29624  	JE   LBB4_1655
 29625  	WORD $0xf631             // xor    esi, esi
 29626  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 29627  
 29628  LBB4_1603:
 29629  	LONG $0x0c6e0f66; BYTE $0x91   // movd    xmm1, dword [rcx + 4*rdx]
 29630  	LONG $0xc87e0f66               // movd    eax, xmm1
 29631  	WORD $0xc085                   // test    eax, eax
 29632  	WORD $0x990f; BYTE $0xd0       // setns    al
 29633  	WORD $0xc000                   // add    al, al
 29634  	WORD $0xff04                   // add    al, -1
 29635  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
 29636  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 29637  	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
 29638  	LONG $0x10048841               // mov    byte [r8 + rdx], al
 29639  	LONG $0x4c6e0f66; WORD $0x0491 // movd    xmm1, dword [rcx + 4*rdx + 4]
 29640  	LONG $0xc87e0f66               // movd    eax, xmm1
 29641  	WORD $0xc085                   // test    eax, eax
 29642  	WORD $0x990f; BYTE $0xd0       // setns    al
 29643  	WORD $0xc000                   // add    al, al
 29644  	WORD $0xff04                   // add    al, -1
 29645  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
 29646  	WORD $0xb60f; BYTE $0xc0       // movzx    eax, al
 29647  	WORD $0x440f; BYTE $0xc6       // cmove    eax, esi
 29648  	LONG $0x10448841; BYTE $0x01   // mov    byte [r8 + rdx + 1], al
 29649  	LONG $0x02c28348               // add    rdx, 2
 29650  	WORD $0x3949; BYTE $0xd2       // cmp    r10, rdx
 29651  	JNE  LBB4_1603
 29652  	JMP  LBB4_1655
 29653  
 29654  LBB4_458:
 29655  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29656  	JLE  LBB4_1655
 29657  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29658  	LONG $0x20f98341         // cmp    r9d, 32
 29659  	JB   LBB4_460
 29660  	LONG $0x01148d48         // lea    rdx, [rcx + rax]
 29661  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29662  	JBE  LBB4_932
 29663  	LONG $0x00148d49         // lea    rdx, [r8 + rax]
 29664  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29665  	JBE  LBB4_932
 29666  
 29667  LBB4_460:
 29668  	WORD $0xd231 // xor    edx, edx
 29669  
 29670  LBB4_1608:
 29671  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29672  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29673  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 29674  	WORD $0x8948; BYTE $0xc7 // mov    rdi, rax
 29675  	LONG $0x03e78348         // and    rdi, 3
 29676  	JE   LBB4_1610
 29677  
 29678  LBB4_1609:
 29679  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 29680  	LONG $0x14950f41; BYTE $0x10 // setne    byte [r8 + rdx]
 29681  	LONG $0x01c28348             // add    rdx, 1
 29682  	LONG $0xffc78348             // add    rdi, -1
 29683  	JNE  LBB4_1609
 29684  
 29685  LBB4_1610:
 29686  	LONG $0x03fe8348 // cmp    rsi, 3
 29687  	JB   LBB4_1655
 29688  
 29689  LBB4_1611:
 29690  	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
 29691  	LONG $0x14950f41; BYTE $0x10   // setne    byte [r8 + rdx]
 29692  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 29693  	LONG $0x54950f41; WORD $0x0110 // setne    byte [r8 + rdx + 1]
 29694  	LONG $0x02117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 2], 0
 29695  	LONG $0x54950f41; WORD $0x0210 // setne    byte [r8 + rdx + 2]
 29696  	LONG $0x03117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 3], 0
 29697  	LONG $0x54950f41; WORD $0x0310 // setne    byte [r8 + rdx + 3]
 29698  	LONG $0x04c28348               // add    rdx, 4
 29699  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 29700  	JNE  LBB4_1611
 29701  	JMP  LBB4_1655
 29702  
 29703  LBB4_461:
 29704  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29705  	JLE  LBB4_1655
 29706  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29707  	LONG $0x08f98341         // cmp    r9d, 8
 29708  	JB   LBB4_463
 29709  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 29710  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29711  	JBE  LBB4_937
 29712  	LONG $0x10148d4b         // lea    rdx, [r8 + r10]
 29713  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29714  	JBE  LBB4_937
 29715  
 29716  LBB4_463:
 29717  	WORD $0xf631 // xor    esi, esi
 29718  
 29719  LBB4_1616:
 29720  	WORD $0x8948; BYTE $0xf0     // mov    rax, rsi
 29721  	WORD $0xf748; BYTE $0xd0     // not    rax
 29722  	LONG $0x01c2f641             // test    r10b, 1
 29723  	JE   LBB4_1618
 29724  	WORD $0x3c8b; BYTE $0xb1     // mov    edi, dword [rcx + 4*rsi]
 29725  	WORD $0xff85                 // test    edi, edi
 29726  	LONG $0xd1950f41             // setne    r9b
 29727  	WORD $0xf641; BYTE $0xd9     // neg    r9b
 29728  	WORD $0xff85                 // test    edi, edi
 29729  	LONG $0xc9b60f45             // movzx    r9d, r9b
 29730  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 29731  	LONG $0xf94e0f41             // cmovle    edi, r9d
 29732  	LONG $0x303c8841             // mov    byte [r8 + rsi], dil
 29733  	LONG $0x01ce8348             // or    rsi, 1
 29734  
 29735  LBB4_1618:
 29736  	WORD $0x014c; BYTE $0xd0       // add    rax, r10
 29737  	JE   LBB4_1655
 29738  	LONG $0x0001b941; WORD $0x0000 // mov    r9d, 1
 29739  
 29740  LBB4_1620:
 29741  	WORD $0x3c8b; BYTE $0xb1     // mov    edi, dword [rcx + 4*rsi]
 29742  	WORD $0xff85                 // test    edi, edi
 29743  	WORD $0x950f; BYTE $0xd0     // setne    al
 29744  	WORD $0xd8f6                 // neg    al
 29745  	WORD $0xff85                 // test    edi, edi
 29746  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
 29747  	LONG $0xc14f0f41             // cmovg    eax, r9d
 29748  	LONG $0x30048841             // mov    byte [r8 + rsi], al
 29749  	LONG $0x04b1448b             // mov    eax, dword [rcx + 4*rsi + 4]
 29750  	WORD $0xc085                 // test    eax, eax
 29751  	WORD $0x950f; BYTE $0xd2     // setne    dl
 29752  	WORD $0xdaf6                 // neg    dl
 29753  	WORD $0xc085                 // test    eax, eax
 29754  	WORD $0xb60f; BYTE $0xc2     // movzx    eax, dl
 29755  	LONG $0xc14f0f41             // cmovg    eax, r9d
 29756  	LONG $0x30448841; BYTE $0x01 // mov    byte [r8 + rsi + 1], al
 29757  	LONG $0x02c68348             // add    rsi, 2
 29758  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 29759  	JNE  LBB4_1620
 29760  	JMP  LBB4_1655
 29761  
 29762  LBB4_464:
 29763  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29764  	JLE  LBB4_1655
 29765  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29766  	LONG $0x08f98341         // cmp    r9d, 8
 29767  	JB   LBB4_466
 29768  	LONG $0x91148d4a         // lea    rdx, [rcx + 4*r10]
 29769  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29770  	JBE  LBB4_942
 29771  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 29772  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29773  	JBE  LBB4_942
 29774  
 29775  LBB4_466:
 29776  	WORD $0xd231 // xor    edx, edx
 29777  
 29778  LBB4_1625:
 29779  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29780  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29781  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 29782  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 29783  	LONG $0x03e78348         // and    rdi, 3
 29784  	JE   LBB4_1627
 29785  
 29786  LBB4_1626:
 29787  	WORD $0xc031             // xor    eax, eax
 29788  	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
 29789  	WORD $0x950f; BYTE $0xd0 // setne    al
 29790  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 29791  	LONG $0x01c28348         // add    rdx, 1
 29792  	LONG $0xffc78348         // add    rdi, -1
 29793  	JNE  LBB4_1626
 29794  
 29795  LBB4_1627:
 29796  	LONG $0x03fe8348 // cmp    rsi, 3
 29797  	JB   LBB4_1655
 29798  
 29799  LBB4_1628:
 29800  	WORD $0xc031                 // xor    eax, eax
 29801  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 29802  	WORD $0x950f; BYTE $0xd0     // setne    al
 29803  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 29804  	WORD $0xc031                 // xor    eax, eax
 29805  	LONG $0x04917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 4], 0
 29806  	WORD $0x950f; BYTE $0xd0     // setne    al
 29807  	LONG $0x90448941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], eax
 29808  	WORD $0xc031                 // xor    eax, eax
 29809  	LONG $0x08917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 8], 0
 29810  	WORD $0x950f; BYTE $0xd0     // setne    al
 29811  	LONG $0x90448941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], eax
 29812  	WORD $0xc031                 // xor    eax, eax
 29813  	LONG $0x0c917c83; BYTE $0x00 // cmp    dword [rcx + 4*rdx + 12], 0
 29814  	WORD $0x950f; BYTE $0xd0     // setne    al
 29815  	LONG $0x90448941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], eax
 29816  	LONG $0x04c28348             // add    rdx, 4
 29817  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 29818  	JNE  LBB4_1628
 29819  	JMP  LBB4_1655
 29820  
 29821  LBB4_467:
 29822  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29823  	JLE  LBB4_1655
 29824  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29825  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
 29826  	LONG $0x04f98341         // cmp    r9d, 4
 29827  	JAE  LBB4_945
 29828  	WORD $0xf631             // xor    esi, esi
 29829  	JMP  LBB4_1080
 29830  
 29831  LBB4_470:
 29832  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29833  	JLE  LBB4_1655
 29834  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29835  	LONG $0x08f98341         // cmp    r9d, 8
 29836  	JB   LBB4_472
 29837  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 29838  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29839  	JBE  LBB4_950
 29840  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 29841  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29842  	JBE  LBB4_950
 29843  
 29844  LBB4_472:
 29845  	WORD $0xd231 // xor    edx, edx
 29846  
 29847  LBB4_1633:
 29848  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 29849  	WORD $0xf748; BYTE $0xd6     // not    rsi
 29850  	LONG $0x01c2f641             // test    r10b, 1
 29851  	JE   LBB4_1635
 29852  	LONG $0x110c8a44             // mov    r9b, byte [rcx + rdx]
 29853  	WORD $0xff31                 // xor    edi, edi
 29854  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 29855  	LONG $0xd7950f40             // setne    dil
 29856  	WORD $0xdff7                 // neg    edi
 29857  	WORD $0x8445; BYTE $0xc9     // test    r9b, r9b
 29858  	LONG $0x000001b8; BYTE $0x00 // mov    eax, 1
 29859  	WORD $0x4e0f; BYTE $0xc7     // cmovle    eax, edi
 29860  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 29861  	LONG $0x01ca8348             // or    rdx, 1
 29862  
 29863  LBB4_1635:
 29864  	WORD $0x014c; BYTE $0xd6     // add    rsi, r10
 29865  	JE   LBB4_1655
 29866  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 29867  
 29868  LBB4_1637:
 29869  	LONG $0x1104b60f             // movzx    eax, byte [rcx + rdx]
 29870  	WORD $0xff31                 // xor    edi, edi
 29871  	WORD $0xc084                 // test    al, al
 29872  	LONG $0xd7950f40             // setne    dil
 29873  	WORD $0xdff7                 // neg    edi
 29874  	WORD $0xc084                 // test    al, al
 29875  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 29876  	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
 29877  	LONG $0x1144b60f; BYTE $0x01 // movzx    eax, byte [rcx + rdx + 1]
 29878  	WORD $0xff31                 // xor    edi, edi
 29879  	WORD $0xc084                 // test    al, al
 29880  	LONG $0xd7950f40             // setne    dil
 29881  	WORD $0xdff7                 // neg    edi
 29882  	WORD $0xc084                 // test    al, al
 29883  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 29884  	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
 29885  	LONG $0x02c28348             // add    rdx, 2
 29886  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 29887  	JNE  LBB4_1637
 29888  	JMP  LBB4_1655
 29889  
 29890  LBB4_473:
 29891  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29892  	JLE  LBB4_1655
 29893  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29894  	LONG $0x04f98341         // cmp    r9d, 4
 29895  	JAE  LBB4_953
 29896  	WORD $0xd231             // xor    edx, edx
 29897  	JMP  LBB4_1086
 29898  
 29899  LBB4_476:
 29900  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29901  	JLE  LBB4_1655
 29902  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29903  	LONG $0x08f98341         // cmp    r9d, 8
 29904  	JAE  LBB4_956
 29905  	WORD $0xd231             // xor    edx, edx
 29906  	JMP  LBB4_1091
 29907  
 29908  LBB4_479:
 29909  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29910  	JLE  LBB4_1655
 29911  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29912  	LONG $0x08f98341         // cmp    r9d, 8
 29913  	JAE  LBB4_959
 29914  	WORD $0xd231             // xor    edx, edx
 29915  	JMP  LBB4_1096
 29916  
 29917  LBB4_482:
 29918  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29919  	JLE  LBB4_1655
 29920  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29921  	LONG $0x04f98341         // cmp    r9d, 4
 29922  	JAE  LBB4_962
 29923  	WORD $0xd231             // xor    edx, edx
 29924  	JMP  LBB4_1102
 29925  
 29926  LBB4_485:
 29927  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29928  	JLE  LBB4_1655
 29929  	WORD $0x8944; BYTE $0xc8 // mov    eax, r9d
 29930  	LONG $0x08f98341         // cmp    r9d, 8
 29931  	JAE  LBB4_965
 29932  	WORD $0xd231             // xor    edx, edx
 29933  	JMP  LBB4_968
 29934  
 29935  LBB4_488:
 29936  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29937  	JLE  LBB4_1655
 29938  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
 29939  	LONG $0x08f98341         // cmp    r9d, 8
 29940  	JB   LBB4_490
 29941  	LONG $0x11148d4a         // lea    rdx, [rcx + r10]
 29942  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 29943  	JBE  LBB4_974
 29944  	LONG $0x90148d4b         // lea    rdx, [r8 + 4*r10]
 29945  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 29946  	JBE  LBB4_974
 29947  
 29948  LBB4_490:
 29949  	WORD $0xd231 // xor    edx, edx
 29950  
 29951  LBB4_1642:
 29952  	WORD $0x8948; BYTE $0xd6 // mov    rsi, rdx
 29953  	WORD $0xf748; BYTE $0xd6 // not    rsi
 29954  	WORD $0x014c; BYTE $0xd6 // add    rsi, r10
 29955  	WORD $0x894c; BYTE $0xd7 // mov    rdi, r10
 29956  	LONG $0x03e78348         // and    rdi, 3
 29957  	JE   LBB4_1644
 29958  
 29959  LBB4_1643:
 29960  	WORD $0xc031             // xor    eax, eax
 29961  	LONG $0x00113c80         // cmp    byte [rcx + rdx], 0
 29962  	WORD $0x950f; BYTE $0xd0 // setne    al
 29963  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 29964  	LONG $0x01c28348         // add    rdx, 1
 29965  	LONG $0xffc78348         // add    rdi, -1
 29966  	JNE  LBB4_1643
 29967  
 29968  LBB4_1644:
 29969  	LONG $0x03fe8348 // cmp    rsi, 3
 29970  	JB   LBB4_1655
 29971  
 29972  LBB4_1645:
 29973  	WORD $0xc031                 // xor    eax, eax
 29974  	LONG $0x00113c80             // cmp    byte [rcx + rdx], 0
 29975  	WORD $0x950f; BYTE $0xd0     // setne    al
 29976  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 29977  	WORD $0xc031                 // xor    eax, eax
 29978  	LONG $0x01117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 1], 0
 29979  	WORD $0x950f; BYTE $0xd0     // setne    al
 29980  	LONG $0x90448941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], eax
 29981  	WORD $0xc031                 // xor    eax, eax
 29982  	LONG $0x02117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 2], 0
 29983  	WORD $0x950f; BYTE $0xd0     // setne    al
 29984  	LONG $0x90448941; BYTE $0x08 // mov    dword [r8 + 4*rdx + 8], eax
 29985  	WORD $0xc031                 // xor    eax, eax
 29986  	LONG $0x03117c80; BYTE $0x00 // cmp    byte [rcx + rdx + 3], 0
 29987  	WORD $0x950f; BYTE $0xd0     // setne    al
 29988  	LONG $0x90448941; BYTE $0x0c // mov    dword [r8 + 4*rdx + 12], eax
 29989  	LONG $0x04c28348             // add    rdx, 4
 29990  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 29991  	JNE  LBB4_1645
 29992  	JMP  LBB4_1655
 29993  
 29994  LBB4_491:
 29995  	WORD $0x8545; BYTE $0xc9 // test    r9d, r9d
 29996  	JLE  LBB4_1655
 29997  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
 29998  	LONG $0x08f98341         // cmp    r9d, 8
 29999  	JB   LBB4_493
 30000  	LONG $0x99148d4a         // lea    rdx, [rcx + 4*r11]
 30001  	WORD $0x394c; BYTE $0xc2 // cmp    rdx, r8
 30002  	JBE  LBB4_979
 30003  	LONG $0x98148d4b         // lea    rdx, [r8 + 4*r11]
 30004  	WORD $0x3948; BYTE $0xca // cmp    rdx, rcx
 30005  	JBE  LBB4_979
 30006  
 30007  LBB4_493:
 30008  	WORD $0xd231 // xor    edx, edx
 30009  
 30010  LBB4_1650:
 30011  	WORD $0x8948; BYTE $0xd6     // mov    rsi, rdx
 30012  	WORD $0xf748; BYTE $0xd6     // not    rsi
 30013  	LONG $0x01c3f641             // test    r11b, 1
 30014  	JE   LBB4_1652
 30015  	LONG $0x910c8b44             // mov    r9d, dword [rcx + 4*rdx]
 30016  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
 30017  	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
 30018  	LONG $0xd2950f41             // setne    r10b
 30019  	WORD $0xf741; BYTE $0xda     // neg    r10d
 30020  	WORD $0x8545; BYTE $0xc9     // test    r9d, r9d
 30021  	LONG $0x000001bf; BYTE $0x00 // mov    edi, 1
 30022  	LONG $0xfa4e0f41             // cmovle    edi, r10d
 30023  	LONG $0x903c8941             // mov    dword [r8 + 4*rdx], edi
 30024  	LONG $0x01ca8348             // or    rdx, 1
 30025  
 30026  LBB4_1652:
 30027  	WORD $0x014c; BYTE $0xde     // add    rsi, r11
 30028  	JE   LBB4_1655
 30029  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 30030  
 30031  LBB4_1654:
 30032  	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
 30033  	WORD $0xc031                 // xor    eax, eax
 30034  	WORD $0xff85                 // test    edi, edi
 30035  	WORD $0x950f; BYTE $0xd0     // setne    al
 30036  	WORD $0xd8f7                 // neg    eax
 30037  	WORD $0xff85                 // test    edi, edi
 30038  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 30039  	LONG $0x90048941             // mov    dword [r8 + 4*rdx], eax
 30040  	LONG $0x0491448b             // mov    eax, dword [rcx + 4*rdx + 4]
 30041  	WORD $0xff31                 // xor    edi, edi
 30042  	WORD $0xc085                 // test    eax, eax
 30043  	LONG $0xd7950f40             // setne    dil
 30044  	WORD $0xdff7                 // neg    edi
 30045  	WORD $0xc085                 // test    eax, eax
 30046  	WORD $0x4f0f; BYTE $0xfe     // cmovg    edi, esi
 30047  	LONG $0x907c8941; BYTE $0x04 // mov    dword [r8 + 4*rdx + 4], edi
 30048  	LONG $0x02c28348             // add    rdx, 2
 30049  	WORD $0x3949; BYTE $0xd3     // cmp    r11, rdx
 30050  	JNE  LBB4_1654
 30051  	JMP  LBB4_1655
 30052  
 30053  LBB4_1524:
 30054  	LONG $0x03fe8348         // cmp    rsi, 3
 30055  	JB   LBB4_1655
 30056  	QUAD $0x00000128856e0f66 // movd    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 30057  	JMP  LBB4_1527
 30058  
 30059  LBB4_1526:
 30060  	LONG $0x7e0f4166; WORD $0x904c; BYTE $0x0c // movd    dword [r8 + 4*rdx + 12], xmm1
 30061  	LONG $0x04c28348                           // add    rdx, 4
 30062  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 30063  	JE   LBB4_1655
 30064  
 30065  LBB4_1527:
 30066  	LONG $0x00113c80               // cmp    byte [rcx + rdx], 0
 30067  	LONG $0xc86f0f66               // movdqa    xmm1, xmm0
 30068  	JNE  LBB4_1528
 30069  	LONG $0xc9ef0f66               // pxor    xmm1, xmm1
 30070  	LONG $0x7e0f4166; WORD $0x900c // movd    dword [r8 + 4*rdx], xmm1
 30071  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 30072  	LONG $0xc86f0f66               // movdqa    xmm1, xmm0
 30073  	JE   LBB4_1532
 30074  
 30075  LBB4_1529:
 30076  	LONG $0x7e0f4166; WORD $0x904c; BYTE $0x04 // movd    dword [r8 + 4*rdx + 4], xmm1
 30077  	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
 30078  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 30079  	JNE  LBB4_1530
 30080  
 30081  LBB4_1533:
 30082  	LONG $0xc9ef0f66                           // pxor    xmm1, xmm1
 30083  	LONG $0x7e0f4166; WORD $0x904c; BYTE $0x08 // movd    dword [r8 + 4*rdx + 8], xmm1
 30084  	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
 30085  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 30086  	JNE  LBB4_1526
 30087  	JMP  LBB4_1534
 30088  
 30089  LBB4_1528:
 30090  	LONG $0x7e0f4166; WORD $0x900c // movd    dword [r8 + 4*rdx], xmm1
 30091  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 30092  	LONG $0xc86f0f66               // movdqa    xmm1, xmm0
 30093  	JNE  LBB4_1529
 30094  
 30095  LBB4_1532:
 30096  	LONG $0xc9ef0f66                           // pxor    xmm1, xmm1
 30097  	LONG $0x7e0f4166; WORD $0x904c; BYTE $0x04 // movd    dword [r8 + 4*rdx + 4], xmm1
 30098  	LONG $0x02117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 2], 0
 30099  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 30100  	JE   LBB4_1533
 30101  
 30102  LBB4_1530:
 30103  	LONG $0x7e0f4166; WORD $0x904c; BYTE $0x08 // movd    dword [r8 + 4*rdx + 8], xmm1
 30104  	LONG $0x03117c80; BYTE $0x00               // cmp    byte [rcx + rdx + 3], 0
 30105  	LONG $0xc86f0f66                           // movdqa    xmm1, xmm0
 30106  	JNE  LBB4_1526
 30107  
 30108  LBB4_1534:
 30109  	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
 30110  	JMP  LBB4_1526
 30111  
 30112  LBB4_499:
 30113  	WORD $0x8944; BYTE $0xde     // mov    esi, r11d
 30114  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 30115  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 30116  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 30117  	LONG $0x02e9c149             // shr    r9, 2
 30118  	LONG $0x01c18349             // add    r9, 1
 30119  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 30120  	JE   LBB4_1106
 30121  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 30122  	LONG $0xfee28348             // and    rdx, -2
 30123  	WORD $0xf748; BYTE $0xda     // neg    rdx
 30124  	WORD $0xff31                 // xor    edi, edi
 30125  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 30126  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 30127  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI4_1] */
 30128  
 30129  LBB4_501:
 30130  	LONG $0x2c100f66; BYTE $0xf9   // movupd    xmm5, oword [rcx + 8*rdi]
 30131  	LONG $0x74100f66; WORD $0x10f9 // movupd    xmm6, oword [rcx + 8*rdi + 16]
 30132  	LONG $0xdd280f66               // movapd    xmm3, xmm5
 30133  	LONG $0xd8c20f66; BYTE $0x00   // cmpeqpd    xmm3, xmm0
 30134  	LONG $0xe8dbc60f               // shufps    xmm3, xmm3, 232
 30135  	LONG $0xe6280f66               // movapd    xmm4, xmm6
 30136  	LONG $0xe0c20f66; BYTE $0x00   // cmpeqpd    xmm4, xmm0
 30137  	LONG $0xe9540f66               // andpd    xmm5, xmm1
 30138  	LONG $0xea560f66               // orpd    xmm5, xmm2
 30139  	LONG $0xf1540f66               // andpd    xmm6, xmm1
 30140  	LONG $0xf2560f66               // orpd    xmm6, xmm2
 30141  	LONG $0xfd700f66; BYTE $0xee   // pshufd    xmm7, xmm5, 238
 30142  	LONG $0x2c0f48f2; BYTE $0xc7   // cvttsd2si    rax, xmm7
 30143  	LONG $0x2c0f48f2; BYTE $0xdd   // cvttsd2si    rbx, xmm5
 30144  	LONG $0xeb6e0f66               // movd    xmm5, ebx
 30145  	LONG $0x223a0f66; WORD $0x01e8 // pinsrd    xmm5, eax, 1
 30146  	LONG $0xfe700f66; BYTE $0xee   // pshufd    xmm7, xmm6, 238
 30147  	LONG $0x2c0f48f2; BYTE $0xc7   // cvttsd2si    rax, xmm7
 30148  	LONG $0x2c0f48f2; BYTE $0xde   // cvttsd2si    rbx, xmm6
 30149  	LONG $0xe8e4c60f               // shufps    xmm4, xmm4, 232
 30150  	LONG $0xf36e0f66               // movd    xmm6, ebx
 30151  	LONG $0x223a0f66; WORD $0x01f0 // pinsrd    xmm6, eax, 1
 30152  	WORD $0x550f; BYTE $0xdd       // andnps    xmm3, xmm5
 30153  	WORD $0x550f; BYTE $0xe6       // andnps    xmm4, xmm6
 30154  	WORD $0x160f; BYTE $0xdc       // movlhps    xmm3, xmm4
 30155  	LONG $0x1c110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm3
 30156  	LONG $0x6c100f66; WORD $0x20f9 // movupd    xmm5, oword [rcx + 8*rdi + 32]
 30157  	LONG $0x74100f66; WORD $0x30f9 // movupd    xmm6, oword [rcx + 8*rdi + 48]
 30158  	LONG $0xdd280f66               // movapd    xmm3, xmm5
 30159  	LONG $0xd8c20f66; BYTE $0x00   // cmpeqpd    xmm3, xmm0
 30160  	LONG $0xe8dbc60f               // shufps    xmm3, xmm3, 232
 30161  	LONG $0xe6280f66               // movapd    xmm4, xmm6
 30162  	LONG $0xe0c20f66; BYTE $0x00   // cmpeqpd    xmm4, xmm0
 30163  	LONG $0xe8e4c60f               // shufps    xmm4, xmm4, 232
 30164  	LONG $0xe9540f66               // andpd    xmm5, xmm1
 30165  	LONG $0xea560f66               // orpd    xmm5, xmm2
 30166  	LONG $0xf1540f66               // andpd    xmm6, xmm1
 30167  	LONG $0xfd700f66; BYTE $0xee   // pshufd    xmm7, xmm5, 238
 30168  	LONG $0x2c0f48f2; BYTE $0xc7   // cvttsd2si    rax, xmm7
 30169  	LONG $0xf2560f66               // orpd    xmm6, xmm2
 30170  	LONG $0x2c0f48f2; BYTE $0xdd   // cvttsd2si    rbx, xmm5
 30171  	LONG $0xeb6e0f66               // movd    xmm5, ebx
 30172  	LONG $0x223a0f66; WORD $0x01e8 // pinsrd    xmm5, eax, 1
 30173  	WORD $0x550f; BYTE $0xdd       // andnps    xmm3, xmm5
 30174  	LONG $0xee700f66; BYTE $0xee   // pshufd    xmm5, xmm6, 238
 30175  	LONG $0x2c0f48f2; BYTE $0xc5   // cvttsd2si    rax, xmm5
 30176  	LONG $0x2c0f48f2; BYTE $0xde   // cvttsd2si    rbx, xmm6
 30177  	LONG $0xeb6e0f66               // movd    xmm5, ebx
 30178  	LONG $0x223a0f66; WORD $0x01e8 // pinsrd    xmm5, eax, 1
 30179  	WORD $0x550f; BYTE $0xe5       // andnps    xmm4, xmm5
 30180  	WORD $0x160f; BYTE $0xdc       // movlhps    xmm3, xmm4
 30181  	LONG $0x5c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm3
 30182  	LONG $0x08c78348               // add    rdi, 8
 30183  	LONG $0x02c28348               // add    rdx, 2
 30184  	JNE  LBB4_501
 30185  	JMP  LBB4_1107
 30186  
 30187  LBB4_507:
 30188  	WORD $0xc289             // mov    edx, eax
 30189  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30190  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 30191  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 30192  	LONG $0x02e9c149         // shr    r9, 2
 30193  	LONG $0x01c18349         // add    r9, 1
 30194  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 30195  	JE   LBB4_994
 30196  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 30197  	LONG $0xfee78348         // and    rdi, -2
 30198  	WORD $0xf748; BYTE $0xdf // neg    rdi
 30199  	WORD $0xf631             // xor    esi, esi
 30200  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 30201  	QUAD $0x000000a08d6f0f66 // movdqa    xmm1, oword 160[rbp] /* [rip + .LCPI4_16] */
 30202  
 30203  LBB4_509:
 30204  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 30205  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 30206  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 30207  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 30208  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 30209  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 30210  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 30211  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 30212  	LONG $0xd36c0f66                           // punpcklqdq    xmm2, xmm3
 30213  	LONG $0x7f0f41f3; WORD $0xb014             // movdqu    oword [r8 + 4*rsi], xmm2
 30214  	LONG $0x546f0ff3; WORD $0x20f1             // movdqu    xmm2, oword [rcx + 8*rsi + 32]
 30215  	LONG $0x5c6f0ff3; WORD $0x30f1             // movdqu    xmm3, oword [rcx + 8*rsi + 48]
 30216  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 30217  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 30218  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 30219  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 30220  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 30221  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 30222  	LONG $0xd36c0f66                           // punpcklqdq    xmm2, xmm3
 30223  	LONG $0x7f0f41f3; WORD $0xb054; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm2
 30224  	LONG $0x08c68348                           // add    rsi, 8
 30225  	LONG $0x02c78348                           // add    rdi, 2
 30226  	JNE  LBB4_509
 30227  	JMP  LBB4_995
 30228  
 30229  LBB4_510:
 30230  	WORD $0xc289                 // mov    edx, eax
 30231  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 30232  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 30233  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 30234  	LONG $0x03e9c149             // shr    r9, 3
 30235  	LONG $0x01c18349             // add    r9, 1
 30236  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 30237  	JE   LBB4_1112
 30238  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 30239  	LONG $0xfee78348             // and    rdi, -2
 30240  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 30241  	WORD $0xf631                 // xor    esi, esi
 30242  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 30243  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 30244  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 30245  
 30246  LBB4_512:
 30247  	LONG $0x1c7e0ff3; BYTE $0x71               // movq    xmm3, qword [rcx + 2*rsi]
 30248  	LONG $0x647e0ff3; WORD $0x0871             // movq    xmm4, qword [rcx + 2*rsi + 8]
 30249  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 30250  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 30251  	LONG $0x33380f66; BYTE $0xdb               // pmovzxwd    xmm3, xmm3
 30252  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 30253  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 30254  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 30255  	LONG $0x33380f66; BYTE $0xe4               // pmovzxwd    xmm4, xmm4
 30256  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 30257  	LONG $0x7f0f41f3; WORD $0xb01c             // movdqu    oword [r8 + 4*rsi], xmm3
 30258  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm4
 30259  	LONG $0x5c7e0ff3; WORD $0x1071             // movq    xmm3, qword [rcx + 2*rsi + 16]
 30260  	LONG $0x647e0ff3; WORD $0x1871             // movq    xmm4, qword [rcx + 2*rsi + 24]
 30261  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 30262  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 30263  	LONG $0x33380f66; BYTE $0xdb               // pmovzxwd    xmm3, xmm3
 30264  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 30265  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 30266  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 30267  	LONG $0x33380f66; BYTE $0xe4               // pmovzxwd    xmm4, xmm4
 30268  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 30269  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm3
 30270  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm4
 30271  	LONG $0x10c68348                           // add    rsi, 16
 30272  	LONG $0x02c78348                           // add    rdi, 2
 30273  	JNE  LBB4_512
 30274  	JMP  LBB4_1113
 30275  
 30276  LBB4_513:
 30277  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 30278  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 30279  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 30280  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 30281  	LONG $0x03e9c149         // shr    r9, 3
 30282  	LONG $0x01c18349         // add    r9, 1
 30283  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 30284  	JE   LBB4_1117
 30285  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 30286  	LONG $0xfee78348         // and    rdi, -2
 30287  	WORD $0xf748; BYTE $0xdf // neg    rdi
 30288  	WORD $0xf631             // xor    esi, esi
 30289  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 30290  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 30291  	LONG $0x5065280f         // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 30292  
 30293  LBB4_515:
 30294  	LONG $0x2c7e0ff3; BYTE $0x71   // movq    xmm5, qword [rcx + 2*rsi]
 30295  	LONG $0x747e0ff3; WORD $0x0871 // movq    xmm6, qword [rcx + 2*rsi + 8]
 30296  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30297  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 30298  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 30299  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 30300  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 30301  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 30302  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 30303  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 30304  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 30305  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 30306  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 30307  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 30308  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 30309  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 30310  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 30311  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 30312  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 30313  	LONG $0x6c7e0ff3; WORD $0x1071 // movq    xmm5, qword [rcx + 2*rsi + 16]
 30314  	LONG $0x747e0ff3; WORD $0x1871 // movq    xmm6, qword [rcx + 2*rsi + 24]
 30315  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30316  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 30317  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 30318  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 30319  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 30320  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 30321  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 30322  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 30323  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 30324  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 30325  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 30326  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 30327  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 30328  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 30329  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 30330  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 30331  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 30332  	LONG $0x10c68348               // add    rsi, 16
 30333  	LONG $0x02c78348               // add    rdi, 2
 30334  	JNE  LBB4_515
 30335  	JMP  LBB4_1118
 30336  
 30337  LBB4_516:
 30338  	WORD $0x8944; BYTE $0xd2                   // mov    edx, r10d
 30339  	WORD $0xe283; BYTE $0xfc                   // and    edx, -4
 30340  	LONG $0xfc728d48                           // lea    rsi, [rdx - 4]
 30341  	WORD $0x8949; BYTE $0xf1                   // mov    r9, rsi
 30342  	LONG $0x02e9c149                           // shr    r9, 2
 30343  	LONG $0x01c18349                           // add    r9, 1
 30344  	WORD $0x8548; BYTE $0xf6                   // test    rsi, rsi
 30345  	JE   LBB4_1123
 30346  	WORD $0x894c; BYTE $0xcf                   // mov    rdi, r9
 30347  	LONG $0xfee78348                           // and    rdi, -2
 30348  	WORD $0xf748; BYTE $0xdf                   // neg    rdi
 30349  	WORD $0xf631                               // xor    esi, esi
 30350  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 30351  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 30352  	LONG $0xa0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 160[rbp] /* [rip + .LCPI4_16] */
 30353  
 30354  LBB4_518:
 30355  	LONG $0x2c6f0ff3; BYTE $0xf1   // movdqu    xmm5, oword [rcx + 8*rsi]
 30356  	LONG $0x746f0ff3; WORD $0x10f1 // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 30357  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30358  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
 30359  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 30360  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 30361  	LONG $0x37380f66; BYTE $0xca   // pcmpgtq    xmm1, xmm2
 30362  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 30363  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 30364  	LONG $0xed700f66; BYTE $0xe8   // pshufd    xmm5, xmm5, 232
 30365  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 30366  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 30367  	LONG $0xf6700f66; BYTE $0xe8   // pshufd    xmm6, xmm6, 232
 30368  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 30369  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 30370  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 30371  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 30372  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 30373  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 30374  	LONG $0x6c6f0ff3; WORD $0x20f1 // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 30375  	LONG $0x746f0ff3; WORD $0x30f1 // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 30376  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30377  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
 30378  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 30379  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 30380  	LONG $0x37380f66; BYTE $0xca   // pcmpgtq    xmm1, xmm2
 30381  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 30382  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 30383  	LONG $0xed700f66; BYTE $0xe8   // pshufd    xmm5, xmm5, 232
 30384  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 30385  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 30386  	LONG $0xf6700f66; BYTE $0xe8   // pshufd    xmm6, xmm6, 232
 30387  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 30388  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 30389  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 30390  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 30391  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 30392  	LONG $0x6c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm5
 30393  	LONG $0x08c68348               // add    rsi, 8
 30394  	LONG $0x02c78348               // add    rdi, 2
 30395  	JNE  LBB4_518
 30396  	JMP  LBB4_1124
 30397  
 30398  LBB4_519:
 30399  	WORD $0xc289                 // mov    edx, eax
 30400  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 30401  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 30402  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 30403  	LONG $0x02e9c149             // shr    r9, 2
 30404  	LONG $0x01c18349             // add    r9, 1
 30405  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 30406  	JE   LBB4_1129
 30407  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 30408  	LONG $0xfee78348             // and    rdi, -2
 30409  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 30410  	WORD $0xf631                 // xor    esi, esi
 30411  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 30412  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 30413  	LONG $0x605d280f             // movaps    xmm3, oword 96[rbp] /* [rip + .LCPI4_10] */
 30414  	LONG $0x3065280f             // movaps    xmm4, oword 48[rbp] /* [rip + .LCPI4_4] */
 30415  
 30416  LBB4_521:
 30417  	LONG $0x2c6f0ff3; BYTE $0xb1   // movdqu    xmm5, oword [rcx + 4*rsi]
 30418  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30419  	LONG $0xe0720f66; BYTE $0x1f   // psrad    xmm0, 31
 30420  	LONG $0xc2eb0f66               // por    xmm0, xmm2
 30421  	WORD $0x5b0f; BYTE $0xf0       // cvtdq2ps    xmm6, xmm0
 30422  	WORD $0x280f; BYTE $0xc6       // movaps    xmm0, xmm6
 30423  	LONG $0x01c3c20f               // cmpltps    xmm0, xmm3
 30424  	LONG $0xfe5b0ff3               // cvttps2dq    xmm7, xmm6
 30425  	WORD $0x5c0f; BYTE $0xf3       // subps    xmm6, xmm3
 30426  	LONG $0xf65b0ff3               // cvttps2dq    xmm6, xmm6
 30427  	WORD $0x570f; BYTE $0xf4       // xorps    xmm6, xmm4
 30428  	LONG $0x14380f66; BYTE $0xf7   // blendvps    xmm6, xmm7, xmm0
 30429  	LONG $0x04e9c20f               // cmpneqps    xmm5, xmm1
 30430  	WORD $0x540f; BYTE $0xee       // andps    xmm5, xmm6
 30431  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 30432  	LONG $0x6c6f0ff3; WORD $0x10b1 // movdqu    xmm5, oword [rcx + 4*rsi + 16]
 30433  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 30434  	LONG $0xe0720f66; BYTE $0x1f   // psrad    xmm0, 31
 30435  	LONG $0xc2eb0f66               // por    xmm0, xmm2
 30436  	WORD $0x5b0f; BYTE $0xf0       // cvtdq2ps    xmm6, xmm0
 30437  	WORD $0x280f; BYTE $0xc6       // movaps    xmm0, xmm6
 30438  	LONG $0x01c3c20f               // cmpltps    xmm0, xmm3
 30439  	LONG $0xfe5b0ff3               // cvttps2dq    xmm7, xmm6
 30440  	WORD $0x5c0f; BYTE $0xf3       // subps    xmm6, xmm3
 30441  	LONG $0xf65b0ff3               // cvttps2dq    xmm6, xmm6
 30442  	WORD $0x570f; BYTE $0xf4       // xorps    xmm6, xmm4
 30443  	LONG $0x14380f66; BYTE $0xf7   // blendvps    xmm6, xmm7, xmm0
 30444  	LONG $0x04e9c20f               // cmpneqps    xmm5, xmm1
 30445  	WORD $0x540f; BYTE $0xee       // andps    xmm5, xmm6
 30446  	LONG $0x6c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm5
 30447  	LONG $0x08c68348               // add    rsi, 8
 30448  	LONG $0x02c78348               // add    rdi, 2
 30449  	JNE  LBB4_521
 30450  	JMP  LBB4_1130
 30451  
 30452  LBB4_532:
 30453  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30454  	WORD $0xf631             // xor    esi, esi
 30455  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 30456  	JMP  LBB4_534
 30457  
 30458  LBB4_533:
 30459  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm1
 30460  	LONG $0x04c68348                           // add    rsi, 4
 30461  	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
 30462  	JE   LBB4_101
 30463  
 30464  LBB4_534:
 30465  	LONG $0x00b13c83               // cmp    dword [rcx + 4*rsi], 0
 30466  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30467  	JNE  LBB4_535
 30468  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 30469  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30470  	LONG $0x04b17c83; BYTE $0x00   // cmp    dword [rcx + 4*rsi + 4], 0
 30471  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30472  	JE   LBB4_539
 30473  
 30474  LBB4_536:
 30475  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30476  	LONG $0x08b17c83; BYTE $0x00               // cmp    dword [rcx + 4*rsi + 8], 0
 30477  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30478  	JNE  LBB4_537
 30479  
 30480  LBB4_540:
 30481  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30482  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30483  	LONG $0x0cb17c83; BYTE $0x00               // cmp    dword [rcx + 4*rsi + 12], 0
 30484  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30485  	JNE  LBB4_533
 30486  	JMP  LBB4_541
 30487  
 30488  LBB4_535:
 30489  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30490  	LONG $0x04b17c83; BYTE $0x00   // cmp    dword [rcx + 4*rsi + 4], 0
 30491  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30492  	JNE  LBB4_536
 30493  
 30494  LBB4_539:
 30495  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30496  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30497  	LONG $0x08b17c83; BYTE $0x00               // cmp    dword [rcx + 4*rsi + 8], 0
 30498  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30499  	JE   LBB4_540
 30500  
 30501  LBB4_537:
 30502  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30503  	LONG $0x0cb17c83; BYTE $0x00               // cmp    dword [rcx + 4*rsi + 12], 0
 30504  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30505  	JNE  LBB4_533
 30506  
 30507  LBB4_541:
 30508  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 30509  	JMP  LBB4_533
 30510  
 30511  LBB4_547:
 30512  	WORD $0xd689             // mov    esi, edx
 30513  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 30514  	WORD $0xc031             // xor    eax, eax
 30515  	QUAD $0x0000012085100ff2 // movsd    xmm0, qword 288[rbp] /* [rip + .LCPI4_13] */
 30516  	QUAD $0x000001108d100ff2 // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 30517  	JMP  LBB4_549
 30518  
 30519  LBB4_548:
 30520  	LONG $0x110f41f2; WORD $0xc05c; BYTE $0x08 // movsd    qword [r8 + 8*rax + 8], xmm3
 30521  	LONG $0x02c08348                           // add    rax, 2
 30522  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 30523  	JE   LBB4_120
 30524  
 30525  LBB4_549:
 30526  	LONG $0x00013c80 // cmp    byte [rcx + rax], 0
 30527  	LONG $0xd0280f66 // movapd    xmm2, xmm0
 30528  	JNE  LBB4_550
 30529  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30530  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30531  	JLE  LBB4_554
 30532  
 30533  LBB4_551:
 30534  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30535  	LONG $0x01017c80; BYTE $0x00   // cmp    byte [rcx + rax + 1], 0
 30536  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30537  	JNE  LBB4_552
 30538  
 30539  LBB4_555:
 30540  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30541  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30542  	JG   LBB4_548
 30543  	JMP  LBB4_556
 30544  
 30545  LBB4_550:
 30546  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30547  	JG   LBB4_551
 30548  
 30549  LBB4_554:
 30550  	LONG $0xda280f66               // movapd    xmm3, xmm2
 30551  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30552  	LONG $0x01017c80; BYTE $0x00   // cmp    byte [rcx + rax + 1], 0
 30553  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30554  	JE   LBB4_555
 30555  
 30556  LBB4_552:
 30557  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30558  	JG   LBB4_548
 30559  
 30560  LBB4_556:
 30561  	LONG $0xda280f66 // movapd    xmm3, xmm2
 30562  	JMP  LBB4_548
 30563  
 30564  LBB4_557:
 30565  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30566  	WORD $0xf631             // xor    esi, esi
 30567  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 30568  	JMP  LBB4_559
 30569  
 30570  LBB4_558:
 30571  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm1
 30572  	LONG $0x04c68348                           // add    rsi, 4
 30573  	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
 30574  	JE   LBB4_130
 30575  
 30576  LBB4_559:
 30577  	LONG $0xf13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rsi], 0
 30578  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30579  	JNE  LBB4_560
 30580  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 30581  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30582  	LONG $0xf17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rsi + 8], 0
 30583  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30584  	JE   LBB4_564
 30585  
 30586  LBB4_561:
 30587  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30588  	LONG $0xf17c8348; WORD $0x0010             // cmp    qword [rcx + 8*rsi + 16], 0
 30589  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30590  	JNE  LBB4_562
 30591  
 30592  LBB4_565:
 30593  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30594  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30595  	LONG $0xf17c8348; WORD $0x0018             // cmp    qword [rcx + 8*rsi + 24], 0
 30596  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30597  	JNE  LBB4_558
 30598  	JMP  LBB4_566
 30599  
 30600  LBB4_560:
 30601  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30602  	LONG $0xf17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rsi + 8], 0
 30603  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30604  	JNE  LBB4_561
 30605  
 30606  LBB4_564:
 30607  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30608  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30609  	LONG $0xf17c8348; WORD $0x0010             // cmp    qword [rcx + 8*rsi + 16], 0
 30610  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30611  	JE   LBB4_565
 30612  
 30613  LBB4_562:
 30614  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30615  	LONG $0xf17c8348; WORD $0x0018             // cmp    qword [rcx + 8*rsi + 24], 0
 30616  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30617  	JNE  LBB4_558
 30618  
 30619  LBB4_566:
 30620  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 30621  	JMP  LBB4_558
 30622  
 30623  LBB4_567:
 30624  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30625  	WORD $0xf631             // xor    esi, esi
 30626  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 30627  	JMP  LBB4_569
 30628  
 30629  LBB4_568:
 30630  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm1
 30631  	LONG $0x04c68348                           // add    rsi, 4
 30632  	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
 30633  	JE   LBB4_142
 30634  
 30635  LBB4_569:
 30636  	LONG $0x713c8366; BYTE $0x00   // cmp    word [rcx + 2*rsi], 0
 30637  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30638  	JNE  LBB4_570
 30639  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 30640  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30641  	LONG $0x717c8366; WORD $0x0002 // cmp    word [rcx + 2*rsi + 2], 0
 30642  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30643  	JE   LBB4_574
 30644  
 30645  LBB4_571:
 30646  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30647  	LONG $0x717c8366; WORD $0x0004             // cmp    word [rcx + 2*rsi + 4], 0
 30648  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30649  	JNE  LBB4_572
 30650  
 30651  LBB4_575:
 30652  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30653  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30654  	LONG $0x717c8366; WORD $0x0006             // cmp    word [rcx + 2*rsi + 6], 0
 30655  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30656  	JNE  LBB4_568
 30657  	JMP  LBB4_576
 30658  
 30659  LBB4_570:
 30660  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30661  	LONG $0x717c8366; WORD $0x0002 // cmp    word [rcx + 2*rsi + 2], 0
 30662  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30663  	JNE  LBB4_571
 30664  
 30665  LBB4_574:
 30666  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30667  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30668  	LONG $0x717c8366; WORD $0x0004             // cmp    word [rcx + 2*rsi + 4], 0
 30669  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30670  	JE   LBB4_575
 30671  
 30672  LBB4_572:
 30673  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30674  	LONG $0x717c8366; WORD $0x0006             // cmp    word [rcx + 2*rsi + 6], 0
 30675  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30676  	JNE  LBB4_568
 30677  
 30678  LBB4_576:
 30679  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 30680  	JMP  LBB4_568
 30681  
 30682  LBB4_577:
 30683  	WORD $0xd689             // mov    esi, edx
 30684  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 30685  	WORD $0xc031             // xor    eax, eax
 30686  	QUAD $0x0000012085100ff2 // movsd    xmm0, qword 288[rbp] /* [rip + .LCPI4_13] */
 30687  	QUAD $0x000001108d100ff2 // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 30688  	JMP  LBB4_579
 30689  
 30690  LBB4_578:
 30691  	LONG $0x110f41f2; WORD $0xc05c; BYTE $0x08 // movsd    qword [r8 + 8*rax + 8], xmm3
 30692  	LONG $0x02c08348                           // add    rax, 2
 30693  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 30694  	JE   LBB4_154
 30695  
 30696  LBB4_579:
 30697  	LONG $0x413c8366; BYTE $0x00 // cmp    word [rcx + 2*rax], 0
 30698  	LONG $0xd0280f66             // movapd    xmm2, xmm0
 30699  	JNE  LBB4_580
 30700  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 30701  	LONG $0xd9280f66             // movapd    xmm3, xmm1
 30702  	JLE  LBB4_584
 30703  
 30704  LBB4_581:
 30705  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30706  	LONG $0x417c8366; WORD $0x0002 // cmp    word [rcx + 2*rax + 2], 0
 30707  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30708  	JNE  LBB4_582
 30709  
 30710  LBB4_585:
 30711  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30712  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30713  	JG   LBB4_578
 30714  	JMP  LBB4_586
 30715  
 30716  LBB4_580:
 30717  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30718  	JG   LBB4_581
 30719  
 30720  LBB4_584:
 30721  	LONG $0xda280f66               // movapd    xmm3, xmm2
 30722  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30723  	LONG $0x417c8366; WORD $0x0002 // cmp    word [rcx + 2*rax + 2], 0
 30724  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30725  	JE   LBB4_585
 30726  
 30727  LBB4_582:
 30728  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30729  	JG   LBB4_578
 30730  
 30731  LBB4_586:
 30732  	LONG $0xda280f66 // movapd    xmm3, xmm2
 30733  	JMP  LBB4_578
 30734  
 30735  LBB4_587:
 30736  	WORD $0xd689             // mov    esi, edx
 30737  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 30738  	WORD $0xc031             // xor    eax, eax
 30739  	QUAD $0x0000012085100ff2 // movsd    xmm0, qword 288[rbp] /* [rip + .LCPI4_13] */
 30740  	QUAD $0x000001108d100ff2 // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 30741  	JMP  LBB4_589
 30742  
 30743  LBB4_588:
 30744  	LONG $0x110f41f2; WORD $0xc05c; BYTE $0x08 // movsd    qword [r8 + 8*rax + 8], xmm3
 30745  	LONG $0x02c08348                           // add    rax, 2
 30746  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 30747  	JE   LBB4_164
 30748  
 30749  LBB4_589:
 30750  	LONG $0xc13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rax], 0
 30751  	LONG $0xd0280f66             // movapd    xmm2, xmm0
 30752  	JNE  LBB4_590
 30753  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 30754  	LONG $0xd9280f66             // movapd    xmm3, xmm1
 30755  	JLE  LBB4_594
 30756  
 30757  LBB4_591:
 30758  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30759  	LONG $0xc17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rax + 8], 0
 30760  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30761  	JNE  LBB4_592
 30762  
 30763  LBB4_595:
 30764  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30765  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30766  	JG   LBB4_588
 30767  	JMP  LBB4_596
 30768  
 30769  LBB4_590:
 30770  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30771  	JG   LBB4_591
 30772  
 30773  LBB4_594:
 30774  	LONG $0xda280f66               // movapd    xmm3, xmm2
 30775  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30776  	LONG $0xc17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rax + 8], 0
 30777  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30778  	JE   LBB4_595
 30779  
 30780  LBB4_592:
 30781  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30782  	JG   LBB4_588
 30783  
 30784  LBB4_596:
 30785  	LONG $0xda280f66 // movapd    xmm3, xmm2
 30786  	JMP  LBB4_588
 30787  
 30788  LBB4_597:
 30789  	WORD $0xd689             // mov    esi, edx
 30790  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 30791  	WORD $0xc031             // xor    eax, eax
 30792  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 30793  	JMP  LBB4_599
 30794  
 30795  LBB4_598:
 30796  	LONG $0x110f41f2; WORD $0xc04c; BYTE $0x08 // movsd    qword [r8 + 8*rax + 8], xmm1
 30797  	LONG $0x02c08348                           // add    rax, 2
 30798  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 30799  	JE   LBB4_174
 30800  
 30801  LBB4_599:
 30802  	LONG $0x14100ff3; BYTE $0x81 // movss    xmm2, dword [rcx + 4*rax]
 30803  	LONG $0xc9570f66             // xorpd    xmm1, xmm1
 30804  	WORD $0x2e0f; BYTE $0xc2     // ucomiss    xmm0, xmm2
 30805  	LONG $0xdb570f66             // xorpd    xmm3, xmm3
 30806  	JE   LBB4_601
 30807  	WORD $0x500f; BYTE $0xfa     // movmskps    edi, xmm2
 30808  	WORD $0xe783; BYTE $0x01     // and    edi, 1
 30809  	WORD $0xdff7                 // neg    edi
 30810  	WORD $0xcf83; BYTE $0x01     // or    edi, 1
 30811  	WORD $0x570f; BYTE $0xd2     // xorps    xmm2, xmm2
 30812  	LONG $0xd72a0ff3             // cvtsi2ss    xmm2, edi
 30813  	WORD $0x570f; BYTE $0xdb     // xorps    xmm3, xmm3
 30814  	LONG $0xda5a0ff3             // cvtss2sd    xmm3, xmm2
 30815  
 30816  LBB4_601:
 30817  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30818  	LONG $0x54100ff3; WORD $0x0481 // movss    xmm2, dword [rcx + 4*rax + 4]
 30819  	WORD $0x2e0f; BYTE $0xc2       // ucomiss    xmm0, xmm2
 30820  	JE   LBB4_598
 30821  	WORD $0x500f; BYTE $0xfa       // movmskps    edi, xmm2
 30822  	WORD $0xe783; BYTE $0x01       // and    edi, 1
 30823  	WORD $0xdff7                   // neg    edi
 30824  	WORD $0xcf83; BYTE $0x01       // or    edi, 1
 30825  	WORD $0x570f; BYTE $0xc9       // xorps    xmm1, xmm1
 30826  	LONG $0xcf2a0ff3               // cvtsi2ss    xmm1, edi
 30827  	LONG $0xc95a0ff3               // cvtss2sd    xmm1, xmm1
 30828  	JMP  LBB4_598
 30829  
 30830  LBB4_603:
 30831  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30832  	WORD $0xf631             // xor    esi, esi
 30833  	QUAD $0x0000011085100ff2 // movsd    xmm0, qword 272[rbp] /* [rip + .LCPI4_2] */
 30834  	JMP  LBB4_605
 30835  
 30836  LBB4_604:
 30837  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x18 // movsd    qword [r8 + 8*rsi + 24], xmm1
 30838  	LONG $0x04c68348                           // add    rsi, 4
 30839  	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
 30840  	JE   LBB4_185
 30841  
 30842  LBB4_605:
 30843  	LONG $0x00313c80               // cmp    byte [rcx + rsi], 0
 30844  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30845  	JNE  LBB4_606
 30846  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 30847  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30848  	LONG $0x01317c80; BYTE $0x00   // cmp    byte [rcx + rsi + 1], 0
 30849  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30850  	JE   LBB4_610
 30851  
 30852  LBB4_607:
 30853  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30854  	LONG $0x02317c80; BYTE $0x00               // cmp    byte [rcx + rsi + 2], 0
 30855  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30856  	JNE  LBB4_608
 30857  
 30858  LBB4_611:
 30859  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30860  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30861  	LONG $0x03317c80; BYTE $0x00               // cmp    byte [rcx + rsi + 3], 0
 30862  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30863  	JNE  LBB4_604
 30864  	JMP  LBB4_612
 30865  
 30866  LBB4_606:
 30867  	LONG $0x110f41f2; WORD $0xf00c // movsd    qword [r8 + 8*rsi], xmm1
 30868  	LONG $0x01317c80; BYTE $0x00   // cmp    byte [rcx + rsi + 1], 0
 30869  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 30870  	JNE  LBB4_607
 30871  
 30872  LBB4_610:
 30873  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 30874  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x08 // movsd    qword [r8 + 8*rsi + 8], xmm1
 30875  	LONG $0x02317c80; BYTE $0x00               // cmp    byte [rcx + rsi + 2], 0
 30876  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30877  	JE   LBB4_611
 30878  
 30879  LBB4_608:
 30880  	LONG $0x110f41f2; WORD $0xf04c; BYTE $0x10 // movsd    qword [r8 + 8*rsi + 16], xmm1
 30881  	LONG $0x03317c80; BYTE $0x00               // cmp    byte [rcx + rsi + 3], 0
 30882  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 30883  	JNE  LBB4_604
 30884  
 30885  LBB4_612:
 30886  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 30887  	JMP  LBB4_604
 30888  
 30889  LBB4_613:
 30890  	WORD $0xd689             // mov    esi, edx
 30891  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 30892  	WORD $0xc031             // xor    eax, eax
 30893  	QUAD $0x0000012085100ff2 // movsd    xmm0, qword 288[rbp] /* [rip + .LCPI4_13] */
 30894  	QUAD $0x000001108d100ff2 // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 30895  	JMP  LBB4_615
 30896  
 30897  LBB4_614:
 30898  	LONG $0x110f41f2; WORD $0xc05c; BYTE $0x08 // movsd    qword [r8 + 8*rax + 8], xmm3
 30899  	LONG $0x02c08348                           // add    rax, 2
 30900  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 30901  	JE   LBB4_197
 30902  
 30903  LBB4_615:
 30904  	LONG $0x00813c83 // cmp    dword [rcx + 4*rax], 0
 30905  	LONG $0xd0280f66 // movapd    xmm2, xmm0
 30906  	JNE  LBB4_616
 30907  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30908  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30909  	JLE  LBB4_620
 30910  
 30911  LBB4_617:
 30912  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30913  	LONG $0x04817c83; BYTE $0x00   // cmp    dword [rcx + 4*rax + 4], 0
 30914  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30915  	JNE  LBB4_618
 30916  
 30917  LBB4_621:
 30918  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 30919  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30920  	JG   LBB4_614
 30921  	JMP  LBB4_622
 30922  
 30923  LBB4_616:
 30924  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30925  	JG   LBB4_617
 30926  
 30927  LBB4_620:
 30928  	LONG $0xda280f66               // movapd    xmm3, xmm2
 30929  	LONG $0x110f41f2; WORD $0xc01c // movsd    qword [r8 + 8*rax], xmm3
 30930  	LONG $0x04817c83; BYTE $0x00   // cmp    dword [rcx + 4*rax + 4], 0
 30931  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 30932  	JE   LBB4_621
 30933  
 30934  LBB4_618:
 30935  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 30936  	JG   LBB4_614
 30937  
 30938  LBB4_622:
 30939  	LONG $0xda280f66 // movapd    xmm3, xmm2
 30940  	JMP  LBB4_614
 30941  
 30942  LBB4_673:
 30943  	WORD $0xc289             // mov    edx, eax
 30944  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 30945  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 30946  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 30947  	LONG $0x02e9c149         // shr    r9, 2
 30948  	LONG $0x01c18349         // add    r9, 1
 30949  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 30950  	JE   LBB4_999
 30951  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 30952  	LONG $0xfee78348         // and    rdi, -2
 30953  	WORD $0xf748; BYTE $0xdf // neg    rdi
 30954  	WORD $0xf631             // xor    esi, esi
 30955  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 30956  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 30957  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 30958  
 30959  LBB4_675:
 30960  	LONG $0x1c7e0ff3; BYTE $0xb1               // movq    xmm3, qword [rcx + 4*rsi]
 30961  	LONG $0x647e0ff3; WORD $0x08b1             // movq    xmm4, qword [rcx + 4*rsi + 8]
 30962  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 30963  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 30964  	LONG $0x35380f66; BYTE $0xdb               // pmovzxdq    xmm3, xmm3
 30965  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 30966  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 30967  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 30968  	LONG $0x35380f66; BYTE $0xe4               // pmovzxdq    xmm4, xmm4
 30969  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 30970  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 30971  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 30972  	LONG $0x5c7e0ff3; WORD $0x10b1             // movq    xmm3, qword [rcx + 4*rsi + 16]
 30973  	LONG $0x647e0ff3; WORD $0x18b1             // movq    xmm4, qword [rcx + 4*rsi + 24]
 30974  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 30975  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 30976  	LONG $0x35380f66; BYTE $0xdb               // pmovzxdq    xmm3, xmm3
 30977  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 30978  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 30979  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 30980  	LONG $0x35380f66; BYTE $0xe4               // pmovzxdq    xmm4, xmm4
 30981  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 30982  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 30983  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 30984  	LONG $0x08c68348                           // add    rsi, 8
 30985  	LONG $0x02c78348                           // add    rdi, 2
 30986  	JNE  LBB4_675
 30987  	JMP  LBB4_1000
 30988  
 30989  LBB4_676:
 30990  	WORD $0x8944; BYTE $0xd6     // mov    esi, r10d
 30991  	WORD $0xe683; BYTE $0xfe     // and    esi, -2
 30992  	LONG $0xfe468d48             // lea    rax, [rsi - 2]
 30993  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
 30994  	WORD $0xd149; BYTE $0xe9     // shr    r9, 1
 30995  	LONG $0x01c18349             // add    r9, 1
 30996  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
 30997  	JE   LBB4_1004
 30998  	WORD $0x894d; BYTE $0xce     // mov    r14, r9
 30999  	LONG $0xfee68349             // and    r14, -2
 31000  	WORD $0xf749; BYTE $0xde     // neg    r14
 31001  	WORD $0xff31                 // xor    edi, edi
 31002  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 31003  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 31004  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI4_1] */
 31005  	QUAD $0x000001189d100ff2     // movsd    xmm3, qword 280[rbp] /* [rip + .LCPI4_6] */
 31006  
 31007  LBB4_678:
 31008  	LONG $0x24100f66; BYTE $0xf9               // movupd    xmm4, oword [rcx + 8*rdi]
 31009  	LONG $0xec280f66                           // movapd    xmm5, xmm4
 31010  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 31011  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 31012  	LONG $0xf5280f66                           // movapd    xmm6, xmm5
 31013  	LONG $0xf35c0ff2                           // subsd    xmm6, xmm3
 31014  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 31015  	WORD $0x314c; BYTE $0xdb                   // xor    rbx, r11
 31016  	LONG $0x2c0f48f2; BYTE $0xd5               // cvttsd2si    rdx, xmm5
 31017  	LONG $0xeb2e0f66                           // ucomisd    xmm5, xmm3
 31018  	LONG $0xd3430f48                           // cmovae    rdx, rbx
 31019  	LONG $0xed700f66; BYTE $0xee               // pshufd    xmm5, xmm5, 238
 31020  	LONG $0xf56f0f66                           // movdqa    xmm6, xmm5
 31021  	LONG $0xf35c0ff2                           // subsd    xmm6, xmm3
 31022  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 31023  	WORD $0x314c; BYTE $0xdb                   // xor    rbx, r11
 31024  	LONG $0x2c0f48f2; BYTE $0xc5               // cvttsd2si    rax, xmm5
 31025  	LONG $0xeb2e0f66                           // ucomisd    xmm5, xmm3
 31026  	LONG $0xc3430f48                           // cmovae    rax, rbx
 31027  	LONG $0x6e0f4866; BYTE $0xea               // movq    xmm5, rdx
 31028  	LONG $0x6e0f4866; BYTE $0xf0               // movq    xmm6, rax
 31029  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 31030  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 31031  	LONG $0xe5540f66                           // andpd    xmm4, xmm5
 31032  	LONG $0x110f4166; WORD $0xf824             // movupd    oword [r8 + 8*rdi], xmm4
 31033  	LONG $0x64100f66; WORD $0x10f9             // movupd    xmm4, oword [rcx + 8*rdi + 16]
 31034  	LONG $0xec280f66                           // movapd    xmm5, xmm4
 31035  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 31036  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 31037  	LONG $0xf5280f66                           // movapd    xmm6, xmm5
 31038  	LONG $0xf35c0ff2                           // subsd    xmm6, xmm3
 31039  	LONG $0x2c0f48f2; BYTE $0xc6               // cvttsd2si    rax, xmm6
 31040  	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
 31041  	LONG $0x2c0f48f2; BYTE $0xd5               // cvttsd2si    rdx, xmm5
 31042  	LONG $0xeb2e0f66                           // ucomisd    xmm5, xmm3
 31043  	LONG $0xd0430f48                           // cmovae    rdx, rax
 31044  	LONG $0xed700f66; BYTE $0xee               // pshufd    xmm5, xmm5, 238
 31045  	LONG $0xf56f0f66                           // movdqa    xmm6, xmm5
 31046  	LONG $0xf35c0ff2                           // subsd    xmm6, xmm3
 31047  	LONG $0x2c0f48f2; BYTE $0xc6               // cvttsd2si    rax, xmm6
 31048  	WORD $0x314c; BYTE $0xd8                   // xor    rax, r11
 31049  	LONG $0x2c0f48f2; BYTE $0xdd               // cvttsd2si    rbx, xmm5
 31050  	LONG $0xeb2e0f66                           // ucomisd    xmm5, xmm3
 31051  	LONG $0xd8430f48                           // cmovae    rbx, rax
 31052  	LONG $0x6e0f4866; BYTE $0xea               // movq    xmm5, rdx
 31053  	LONG $0x6e0f4866; BYTE $0xf3               // movq    xmm6, rbx
 31054  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 31055  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 31056  	LONG $0xe5540f66                           // andpd    xmm4, xmm5
 31057  	LONG $0x110f4166; WORD $0xf864; BYTE $0x10 // movupd    oword [r8 + 8*rdi + 16], xmm4
 31058  	LONG $0x04c78348                           // add    rdi, 4
 31059  	LONG $0x02c68349                           // add    r14, 2
 31060  	JNE  LBB4_678
 31061  	JMP  LBB4_1005
 31062  
 31063  LBB4_689:
 31064  	WORD $0xc289             // mov    edx, eax
 31065  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31066  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31067  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31068  	LONG $0x02e9c149         // shr    r9, 2
 31069  	LONG $0x01c18349         // add    r9, 1
 31070  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31071  	JE   LBB4_1010
 31072  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31073  	LONG $0xfee78348         // and    rdi, -2
 31074  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31075  	WORD $0xf631             // xor    esi, esi
 31076  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 31077  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 31078  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 31079  
 31080  LBB4_691:
 31081  	LONG $0x1c6e0f66; BYTE $0x71               // movd    xmm3, dword [rcx + 2*rsi]
 31082  	LONG $0x646e0f66; WORD $0x0471             // movd    xmm4, dword [rcx + 2*rsi + 4]
 31083  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 31084  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31085  	LONG $0x34380f66; BYTE $0xdb               // pmovzxwq    xmm3, xmm3
 31086  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31087  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 31088  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31089  	LONG $0x34380f66; BYTE $0xe4               // pmovzxwq    xmm4, xmm4
 31090  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31091  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 31092  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 31093  	LONG $0x5c6e0f66; WORD $0x0871             // movd    xmm3, dword [rcx + 2*rsi + 8]
 31094  	LONG $0x646e0f66; WORD $0x0c71             // movd    xmm4, dword [rcx + 2*rsi + 12]
 31095  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 31096  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31097  	LONG $0x34380f66; BYTE $0xdb               // pmovzxwq    xmm3, xmm3
 31098  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31099  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 31100  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31101  	LONG $0x34380f66; BYTE $0xe4               // pmovzxwq    xmm4, xmm4
 31102  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31103  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 31104  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 31105  	LONG $0x08c68348                           // add    rsi, 8
 31106  	LONG $0x02c78348                           // add    rdi, 2
 31107  	JNE  LBB4_691
 31108  	JMP  LBB4_1011
 31109  
 31110  LBB4_692:
 31111  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 31112  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31113  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31114  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31115  	LONG $0x02e9c149         // shr    r9, 2
 31116  	LONG $0x01c18349         // add    r9, 1
 31117  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31118  	JE   LBB4_1015
 31119  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31120  	LONG $0xfee78348         // and    rdi, -2
 31121  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31122  	WORD $0xf631             // xor    esi, esi
 31123  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 31124  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 31125  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 31126  
 31127  LBB4_694:
 31128  	LONG $0x2c6e0f66; BYTE $0x71               // movd    xmm5, dword [rcx + 2*rsi]
 31129  	LONG $0x746e0f66; WORD $0x0471             // movd    xmm6, dword [rcx + 2*rsi + 4]
 31130  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31131  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 31132  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 31133  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31134  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 31135  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 31136  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 31137  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31138  	LONG $0x24380f66; BYTE $0xed               // pmovsxwq    xmm5, xmm5
 31139  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 31140  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31141  	LONG $0x24380f66; BYTE $0xf6               // pmovsxwq    xmm6, xmm6
 31142  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 31143  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31144  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 31145  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 31146  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 31147  	LONG $0x6c6e0f66; WORD $0x0871             // movd    xmm5, dword [rcx + 2*rsi + 8]
 31148  	LONG $0x746e0f66; WORD $0x0c71             // movd    xmm6, dword [rcx + 2*rsi + 12]
 31149  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31150  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 31151  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 31152  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31153  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 31154  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 31155  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 31156  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31157  	LONG $0x24380f66; BYTE $0xed               // pmovsxwq    xmm5, xmm5
 31158  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 31159  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31160  	LONG $0x24380f66; BYTE $0xf6               // pmovsxwq    xmm6, xmm6
 31161  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 31162  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31163  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 31164  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 31165  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 31166  	LONG $0x08c68348                           // add    rsi, 8
 31167  	LONG $0x02c78348                           // add    rdi, 2
 31168  	JNE  LBB4_694
 31169  	JMP  LBB4_1016
 31170  
 31171  LBB4_700:
 31172  	WORD $0x8944; BYTE $0xd6               // mov    esi, r10d
 31173  	WORD $0xe683; BYTE $0xfe               // and    esi, -2
 31174  	WORD $0xc031                           // xor    eax, eax
 31175  	WORD $0x570f; BYTE $0xc0               // xorps    xmm0, xmm0
 31176  	QUAD $0x0000012c8d100ff3               // movss    xmm1, dword 300[rbp] /* [rip + .LCPI4_9] */
 31177  	QUAD $0x000000000000b949; WORD $0x8000 // mov    r9, -9223372036854775808
 31178  	JMP  LBB4_703
 31179  
 31180  LBB4_701:
 31181  	WORD $0x500f; BYTE $0xd2     // movmskps    edx, xmm2
 31182  	WORD $0xe283; BYTE $0x01     // and    edx, 1
 31183  	WORD $0xdaf7                 // neg    edx
 31184  	WORD $0xca83; BYTE $0x01     // or    edx, 1
 31185  	WORD $0x570f; BYTE $0xd2     // xorps    xmm2, xmm2
 31186  	LONG $0xd22a0ff3             // cvtsi2ss    xmm2, edx
 31187  	WORD $0x280f; BYTE $0xda     // movaps    xmm3, xmm2
 31188  	LONG $0xd95c0ff3             // subss    xmm3, xmm1
 31189  	LONG $0x2c0f48f3; BYTE $0xfb // cvttss2si    rdi, xmm3
 31190  	WORD $0x314c; BYTE $0xcf     // xor    rdi, r9
 31191  	LONG $0x2c0f48f3; BYTE $0xd2 // cvttss2si    rdx, xmm2
 31192  	WORD $0x2e0f; BYTE $0xd1     // ucomiss    xmm2, xmm1
 31193  	LONG $0xd7430f48             // cmovae    rdx, rdi
 31194  	LONG $0xc0548949; BYTE $0x08 // mov    qword [r8 + 8*rax + 8], rdx
 31195  	LONG $0x02c08348             // add    rax, 2
 31196  	WORD $0x3948; BYTE $0xc6     // cmp    rsi, rax
 31197  	JE   LBB4_290
 31198  
 31199  LBB4_703:
 31200  	LONG $0x14100ff3; BYTE $0x81 // movss    xmm2, dword [rcx + 4*rax]
 31201  	WORD $0x2e0f; BYTE $0xc2     // ucomiss    xmm0, xmm2
 31202  	JNE  LBB4_705
 31203  	WORD $0xd231                 // xor    edx, edx
 31204  	JMP  LBB4_706
 31205  
 31206  LBB4_705:
 31207  	WORD $0x500f; BYTE $0xd2     // movmskps    edx, xmm2
 31208  	WORD $0xe283; BYTE $0x01     // and    edx, 1
 31209  	WORD $0xdaf7                 // neg    edx
 31210  	WORD $0xca83; BYTE $0x01     // or    edx, 1
 31211  	WORD $0x570f; BYTE $0xd2     // xorps    xmm2, xmm2
 31212  	LONG $0xd22a0ff3             // cvtsi2ss    xmm2, edx
 31213  	WORD $0x280f; BYTE $0xda     // movaps    xmm3, xmm2
 31214  	LONG $0xd95c0ff3             // subss    xmm3, xmm1
 31215  	LONG $0x2c0f48f3; BYTE $0xfb // cvttss2si    rdi, xmm3
 31216  	WORD $0x314c; BYTE $0xcf     // xor    rdi, r9
 31217  	LONG $0x2c0f48f3; BYTE $0xd2 // cvttss2si    rdx, xmm2
 31218  	WORD $0x2e0f; BYTE $0xd1     // ucomiss    xmm2, xmm1
 31219  	LONG $0xd7430f48             // cmovae    rdx, rdi
 31220  
 31221  LBB4_706:
 31222  	LONG $0xc0148949               // mov    qword [r8 + 8*rax], rdx
 31223  	LONG $0x54100ff3; WORD $0x0481 // movss    xmm2, dword [rcx + 4*rax + 4]
 31224  	WORD $0x2e0f; BYTE $0xc2       // ucomiss    xmm0, xmm2
 31225  	JNE  LBB4_701
 31226  	WORD $0xd231                   // xor    edx, edx
 31227  	LONG $0xc0548949; BYTE $0x08   // mov    qword [r8 + 8*rax + 8], rdx
 31228  	LONG $0x02c08348               // add    rax, 2
 31229  	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
 31230  	JNE  LBB4_703
 31231  
 31232  LBB4_290:
 31233  	LONG $0x01c2f641             // test    r10b, 1
 31234  	JE   LBB4_1655
 31235  	LONG $0x04100ff3; BYTE $0x81 // movss    xmm0, dword [rcx + 4*rax]
 31236  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 31237  	WORD $0x2e0f; BYTE $0xc8     // ucomiss    xmm1, xmm0
 31238  	JNE  LBB4_993
 31239  	WORD $0xc931                 // xor    ecx, ecx
 31240  	LONG $0xc00c8949             // mov    qword [r8 + 8*rax], rcx
 31241  	JMP  LBB4_1655
 31242  
 31243  LBB4_713:
 31244  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 31245  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31246  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31247  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31248  	LONG $0x02e9c149         // shr    r9, 2
 31249  	LONG $0x01c18349         // add    r9, 1
 31250  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31251  	JE   LBB4_1021
 31252  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31253  	LONG $0xfee78348         // and    rdi, -2
 31254  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31255  	WORD $0xf631             // xor    esi, esi
 31256  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 31257  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 31258  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 31259  
 31260  LBB4_715:
 31261  	LONG $0x2c7e0ff3; BYTE $0xb1               // movq    xmm5, qword [rcx + 4*rsi]
 31262  	LONG $0x747e0ff3; WORD $0x08b1             // movq    xmm6, qword [rcx + 4*rsi + 8]
 31263  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31264  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 31265  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 31266  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31267  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 31268  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 31269  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 31270  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31271  	LONG $0x25380f66; BYTE $0xed               // pmovsxdq    xmm5, xmm5
 31272  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 31273  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31274  	LONG $0x25380f66; BYTE $0xf6               // pmovsxdq    xmm6, xmm6
 31275  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 31276  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31277  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 31278  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 31279  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 31280  	LONG $0x6c7e0ff3; WORD $0x10b1             // movq    xmm5, qword [rcx + 4*rsi + 16]
 31281  	LONG $0x747e0ff3; WORD $0x18b1             // movq    xmm6, qword [rcx + 4*rsi + 24]
 31282  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31283  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 31284  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 31285  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31286  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 31287  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 31288  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 31289  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31290  	LONG $0x25380f66; BYTE $0xed               // pmovsxdq    xmm5, xmm5
 31291  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 31292  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31293  	LONG $0x25380f66; BYTE $0xf6               // pmovsxdq    xmm6, xmm6
 31294  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 31295  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31296  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 31297  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 31298  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 31299  	LONG $0x08c68348                           // add    rsi, 8
 31300  	LONG $0x02c78348                           // add    rdi, 2
 31301  	JNE  LBB4_715
 31302  	JMP  LBB4_1022
 31303  
 31304  LBB4_716:
 31305  	WORD $0xc289                 // mov    edx, eax
 31306  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 31307  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 31308  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 31309  	LONG $0x03e9c149             // shr    r9, 3
 31310  	LONG $0x01c18349             // add    r9, 1
 31311  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 31312  	JE   LBB4_1137
 31313  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 31314  	LONG $0xfee78348             // and    rdi, -2
 31315  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 31316  	WORD $0xf631                 // xor    esi, esi
 31317  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 31318  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 31319  	LONG $0x556f0f66; BYTE $0x70 // movdqa    xmm2, oword 112[rbp] /* [rip + .LCPI4_11] */
 31320  
 31321  LBB4_718:
 31322  	LONG $0x1c6f0ff3; BYTE $0xb1               // movdqu    xmm3, oword [rcx + 4*rsi]
 31323  	LONG $0x646f0ff3; WORD $0x10b1             // movdqu    xmm4, oword [rcx + 4*rsi + 16]
 31324  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 31325  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31326  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31327  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31328  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 31329  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31330  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31331  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31332  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 31333  	LONG $0x7f0f41f3; WORD $0x701c             // movdqu    oword [r8 + 2*rsi], xmm3
 31334  	LONG $0x5c6f0ff3; WORD $0x20b1             // movdqu    xmm3, oword [rcx + 4*rsi + 32]
 31335  	LONG $0x646f0ff3; WORD $0x30b1             // movdqu    xmm4, oword [rcx + 4*rsi + 48]
 31336  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 31337  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31338  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31339  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31340  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 31341  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31342  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31343  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31344  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 31345  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 31346  	LONG $0x10c68348                           // add    rsi, 16
 31347  	LONG $0x02c78348                           // add    rdi, 2
 31348  	JNE  LBB4_718
 31349  	JMP  LBB4_1138
 31350  
 31351  LBB4_719:
 31352  	WORD $0xc289                 // mov    edx, eax
 31353  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 31354  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 31355  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 31356  	LONG $0x03e9c149             // shr    r9, 3
 31357  	LONG $0x01c18349             // add    r9, 1
 31358  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 31359  	JE   LBB4_1142
 31360  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 31361  	LONG $0xfee78348             // and    rdi, -2
 31362  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 31363  	WORD $0xf631                 // xor    esi, esi
 31364  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 31365  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 31366  	LONG $0x556f0f66; BYTE $0x70 // movdqa    xmm2, oword 112[rbp] /* [rip + .LCPI4_11] */
 31367  
 31368  LBB4_721:
 31369  	LONG $0x1c6f0ff3; BYTE $0xb1               // movdqu    xmm3, oword [rcx + 4*rsi]
 31370  	LONG $0x646f0ff3; WORD $0x10b1             // movdqu    xmm4, oword [rcx + 4*rsi + 16]
 31371  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 31372  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31373  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31374  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31375  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 31376  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31377  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31378  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31379  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 31380  	LONG $0x7f0f41f3; WORD $0x701c             // movdqu    oword [r8 + 2*rsi], xmm3
 31381  	LONG $0x5c6f0ff3; WORD $0x20b1             // movdqu    xmm3, oword [rcx + 4*rsi + 32]
 31382  	LONG $0x646f0ff3; WORD $0x30b1             // movdqu    xmm4, oword [rcx + 4*rsi + 48]
 31383  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 31384  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31385  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31386  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31387  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 31388  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31389  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31390  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31391  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 31392  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 31393  	LONG $0x10c68348                           // add    rsi, 16
 31394  	LONG $0x02c78348                           // add    rdi, 2
 31395  	JNE  LBB4_721
 31396  	JMP  LBB4_1143
 31397  
 31398  LBB4_722:
 31399  	WORD $0xc689                 // mov    esi, eax
 31400  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 31401  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 31402  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 31403  	LONG $0x02e9c149             // shr    r9, 2
 31404  	LONG $0x01c18349             // add    r9, 1
 31405  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 31406  	JE   LBB4_1147
 31407  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 31408  	LONG $0xfee28348             // and    rdx, -2
 31409  	WORD $0xf748; BYTE $0xda     // neg    rdx
 31410  	WORD $0xff31                 // xor    edi, edi
 31411  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 31412  	LONG $0x5d280f66; BYTE $0x00 // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 31413  	LONG $0x65280f66; BYTE $0x10 // movapd    xmm4, oword 16[rbp] /* [rip + .LCPI4_1] */
 31414  
 31415  LBB4_724:
 31416  	LONG $0x2c100f66; BYTE $0xf9               // movupd    xmm5, oword [rcx + 8*rdi]
 31417  	LONG $0x74100f66; WORD $0x10f9             // movupd    xmm6, oword [rcx + 8*rdi + 16]
 31418  	LONG $0xc5280f66                           // movapd    xmm0, xmm5
 31419  	LONG $0xc2c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm2
 31420  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31421  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31422  	LONG $0xce280f66                           // movapd    xmm1, xmm6
 31423  	LONG $0xcac20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm2
 31424  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31425  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31426  	LONG $0xeb540f66                           // andpd    xmm5, xmm3
 31427  	LONG $0xec560f66                           // orpd    xmm5, xmm4
 31428  	LONG $0xf3540f66                           // andpd    xmm6, xmm3
 31429  	LONG $0xf4560f66                           // orpd    xmm6, xmm4
 31430  	LONG $0xede60f66                           // cvttpd2dq    xmm5, xmm5
 31431  	LONG $0xed700ff2; BYTE $0xe8               // pshuflw    xmm5, xmm5, 232
 31432  	LONG $0xf6e60f66                           // cvttpd2dq    xmm6, xmm6
 31433  	LONG $0xf6700ff2; BYTE $0xe8               // pshuflw    xmm6, xmm6, 232
 31434  	LONG $0x10380f66; BYTE $0xea               // pblendvb    xmm5, xmm2, xmm0
 31435  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31436  	LONG $0x10380f66; BYTE $0xf2               // pblendvb    xmm6, xmm2, xmm0
 31437  	LONG $0x7e0f4166; WORD $0x782c             // movd    dword [r8 + 2*rdi], xmm5
 31438  	LONG $0x7e0f4166; WORD $0x7874; BYTE $0x04 // movd    dword [r8 + 2*rdi + 4], xmm6
 31439  	LONG $0x6c100f66; WORD $0x20f9             // movupd    xmm5, oword [rcx + 8*rdi + 32]
 31440  	LONG $0x74100f66; WORD $0x30f9             // movupd    xmm6, oword [rcx + 8*rdi + 48]
 31441  	LONG $0xc5280f66                           // movapd    xmm0, xmm5
 31442  	LONG $0xc2c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm2
 31443  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31444  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31445  	LONG $0xce280f66                           // movapd    xmm1, xmm6
 31446  	LONG $0xcac20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm2
 31447  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31448  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31449  	LONG $0xeb540f66                           // andpd    xmm5, xmm3
 31450  	LONG $0xec560f66                           // orpd    xmm5, xmm4
 31451  	LONG $0xf3540f66                           // andpd    xmm6, xmm3
 31452  	LONG $0xf4560f66                           // orpd    xmm6, xmm4
 31453  	LONG $0xede60f66                           // cvttpd2dq    xmm5, xmm5
 31454  	LONG $0xed700ff2; BYTE $0xe8               // pshuflw    xmm5, xmm5, 232
 31455  	LONG $0xf6e60f66                           // cvttpd2dq    xmm6, xmm6
 31456  	LONG $0xf6700ff2; BYTE $0xe8               // pshuflw    xmm6, xmm6, 232
 31457  	LONG $0x10380f66; BYTE $0xea               // pblendvb    xmm5, xmm2, xmm0
 31458  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31459  	LONG $0x10380f66; BYTE $0xf2               // pblendvb    xmm6, xmm2, xmm0
 31460  	LONG $0x7e0f4166; WORD $0x786c; BYTE $0x08 // movd    dword [r8 + 2*rdi + 8], xmm5
 31461  	LONG $0x7e0f4166; WORD $0x7874; BYTE $0x0c // movd    dword [r8 + 2*rdi + 12], xmm6
 31462  	LONG $0x08c78348                           // add    rdi, 8
 31463  	LONG $0x02c28348                           // add    rdx, 2
 31464  	JNE  LBB4_724
 31465  	JMP  LBB4_1148
 31466  
 31467  LBB4_725:
 31468  	WORD $0xc689                 // mov    esi, eax
 31469  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 31470  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 31471  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 31472  	LONG $0x02e9c149             // shr    r9, 2
 31473  	LONG $0x01c18349             // add    r9, 1
 31474  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 31475  	JE   LBB4_1153
 31476  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 31477  	LONG $0xfee28348             // and    rdx, -2
 31478  	WORD $0xf748; BYTE $0xda     // neg    rdx
 31479  	WORD $0xff31                 // xor    edi, edi
 31480  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 31481  	LONG $0x5d280f66; BYTE $0x00 // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 31482  	LONG $0x65280f66; BYTE $0x10 // movapd    xmm4, oword 16[rbp] /* [rip + .LCPI4_1] */
 31483  
 31484  LBB4_727:
 31485  	LONG $0x2c100f66; BYTE $0xf9               // movupd    xmm5, oword [rcx + 8*rdi]
 31486  	LONG $0x74100f66; WORD $0x10f9             // movupd    xmm6, oword [rcx + 8*rdi + 16]
 31487  	LONG $0xc5280f66                           // movapd    xmm0, xmm5
 31488  	LONG $0xc2c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm2
 31489  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31490  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31491  	LONG $0xce280f66                           // movapd    xmm1, xmm6
 31492  	LONG $0xcac20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm2
 31493  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31494  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31495  	LONG $0xeb540f66                           // andpd    xmm5, xmm3
 31496  	LONG $0xec560f66                           // orpd    xmm5, xmm4
 31497  	LONG $0xf3540f66                           // andpd    xmm6, xmm3
 31498  	LONG $0xf4560f66                           // orpd    xmm6, xmm4
 31499  	LONG $0xede60f66                           // cvttpd2dq    xmm5, xmm5
 31500  	LONG $0xed700ff2; BYTE $0xe8               // pshuflw    xmm5, xmm5, 232
 31501  	LONG $0xf6e60f66                           // cvttpd2dq    xmm6, xmm6
 31502  	LONG $0xf6700ff2; BYTE $0xe8               // pshuflw    xmm6, xmm6, 232
 31503  	LONG $0x10380f66; BYTE $0xea               // pblendvb    xmm5, xmm2, xmm0
 31504  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31505  	LONG $0x10380f66; BYTE $0xf2               // pblendvb    xmm6, xmm2, xmm0
 31506  	LONG $0x7e0f4166; WORD $0x782c             // movd    dword [r8 + 2*rdi], xmm5
 31507  	LONG $0x7e0f4166; WORD $0x7874; BYTE $0x04 // movd    dword [r8 + 2*rdi + 4], xmm6
 31508  	LONG $0x6c100f66; WORD $0x20f9             // movupd    xmm5, oword [rcx + 8*rdi + 32]
 31509  	LONG $0x74100f66; WORD $0x30f9             // movupd    xmm6, oword [rcx + 8*rdi + 48]
 31510  	LONG $0xc5280f66                           // movapd    xmm0, xmm5
 31511  	LONG $0xc2c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm2
 31512  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31513  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31514  	LONG $0xce280f66                           // movapd    xmm1, xmm6
 31515  	LONG $0xcac20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm2
 31516  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31517  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31518  	LONG $0xeb540f66                           // andpd    xmm5, xmm3
 31519  	LONG $0xec560f66                           // orpd    xmm5, xmm4
 31520  	LONG $0xf3540f66                           // andpd    xmm6, xmm3
 31521  	LONG $0xf4560f66                           // orpd    xmm6, xmm4
 31522  	LONG $0xede60f66                           // cvttpd2dq    xmm5, xmm5
 31523  	LONG $0xed700ff2; BYTE $0xe8               // pshuflw    xmm5, xmm5, 232
 31524  	LONG $0xf6e60f66                           // cvttpd2dq    xmm6, xmm6
 31525  	LONG $0xf6700ff2; BYTE $0xe8               // pshuflw    xmm6, xmm6, 232
 31526  	LONG $0x10380f66; BYTE $0xea               // pblendvb    xmm5, xmm2, xmm0
 31527  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31528  	LONG $0x10380f66; BYTE $0xf2               // pblendvb    xmm6, xmm2, xmm0
 31529  	LONG $0x7e0f4166; WORD $0x786c; BYTE $0x08 // movd    dword [r8 + 2*rdi + 8], xmm5
 31530  	LONG $0x7e0f4166; WORD $0x7874; BYTE $0x0c // movd    dword [r8 + 2*rdi + 12], xmm6
 31531  	LONG $0x08c78348                           // add    rdi, 8
 31532  	LONG $0x02c28348                           // add    rdx, 2
 31533  	JNE  LBB4_727
 31534  	JMP  LBB4_1154
 31535  
 31536  LBB4_738:
 31537  	WORD $0xc289             // mov    edx, eax
 31538  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31539  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31540  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31541  	LONG $0x02e9c149         // shr    r9, 2
 31542  	LONG $0x01c18349         // add    r9, 1
 31543  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31544  	JE   LBB4_1027
 31545  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31546  	LONG $0xfee78348         // and    rdi, -2
 31547  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31548  	WORD $0xf631             // xor    esi, esi
 31549  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 31550  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 31551  	QUAD $0x000000b0956f0f66 // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI4_17] */
 31552  
 31553  LBB4_740:
 31554  	LONG $0x1c6f0ff3; BYTE $0xf1               // movdqu    xmm3, oword [rcx + 8*rsi]
 31555  	LONG $0x646f0ff3; WORD $0x10f1             // movdqu    xmm4, oword [rcx + 8*rsi + 16]
 31556  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 31557  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31558  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31559  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31560  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31561  	LONG $0x29380f66; BYTE $0xe0               // pcmpeqq    xmm4, xmm0
 31562  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31563  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31564  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31565  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31566  	LONG $0x7e0f4166; WORD $0x701c             // movd    dword [r8 + 2*rsi], xmm3
 31567  	LONG $0x7e0f4166; WORD $0x7064; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm4
 31568  	LONG $0x5c6f0ff3; WORD $0x20f1             // movdqu    xmm3, oword [rcx + 8*rsi + 32]
 31569  	LONG $0x646f0ff3; WORD $0x30f1             // movdqu    xmm4, oword [rcx + 8*rsi + 48]
 31570  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 31571  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31572  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31573  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31574  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31575  	LONG $0x29380f66; BYTE $0xe0               // pcmpeqq    xmm4, xmm0
 31576  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31577  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31578  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31579  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31580  	LONG $0x7e0f4166; WORD $0x705c; BYTE $0x08 // movd    dword [r8 + 2*rsi + 8], xmm3
 31581  	LONG $0x7e0f4166; WORD $0x7064; BYTE $0x0c // movd    dword [r8 + 2*rsi + 12], xmm4
 31582  	LONG $0x08c68348                           // add    rsi, 8
 31583  	LONG $0x02c78348                           // add    rdi, 2
 31584  	JNE  LBB4_740
 31585  	JMP  LBB4_1028
 31586  
 31587  LBB4_741:
 31588  	WORD $0xc289             // mov    edx, eax
 31589  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31590  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31591  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31592  	LONG $0x02e9c149         // shr    r9, 2
 31593  	LONG $0x01c18349         // add    r9, 1
 31594  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31595  	JE   LBB4_1032
 31596  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31597  	LONG $0xfee78348         // and    rdi, -2
 31598  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31599  	WORD $0xf631             // xor    esi, esi
 31600  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 31601  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 31602  	QUAD $0x000000b0956f0f66 // movdqa    xmm2, oword 176[rbp] /* [rip + .LCPI4_17] */
 31603  
 31604  LBB4_743:
 31605  	LONG $0x1c6f0ff3; BYTE $0xf1               // movdqu    xmm3, oword [rcx + 8*rsi]
 31606  	LONG $0x646f0ff3; WORD $0x10f1             // movdqu    xmm4, oword [rcx + 8*rsi + 16]
 31607  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 31608  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31609  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31610  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31611  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31612  	LONG $0x29380f66; BYTE $0xe0               // pcmpeqq    xmm4, xmm0
 31613  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31614  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31615  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31616  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31617  	LONG $0x7e0f4166; WORD $0x701c             // movd    dword [r8 + 2*rsi], xmm3
 31618  	LONG $0x7e0f4166; WORD $0x7064; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm4
 31619  	LONG $0x5c6f0ff3; WORD $0x20f1             // movdqu    xmm3, oword [rcx + 8*rsi + 32]
 31620  	LONG $0x646f0ff3; WORD $0x30f1             // movdqu    xmm4, oword [rcx + 8*rsi + 48]
 31621  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 31622  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 31623  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31624  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31625  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 31626  	LONG $0x29380f66; BYTE $0xe0               // pcmpeqq    xmm4, xmm0
 31627  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 31628  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31629  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 31630  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 31631  	LONG $0x7e0f4166; WORD $0x705c; BYTE $0x08 // movd    dword [r8 + 2*rsi + 8], xmm3
 31632  	LONG $0x7e0f4166; WORD $0x7064; BYTE $0x0c // movd    dword [r8 + 2*rsi + 12], xmm4
 31633  	LONG $0x08c68348                           // add    rsi, 8
 31634  	LONG $0x02c78348                           // add    rdi, 2
 31635  	JNE  LBB4_743
 31636  	JMP  LBB4_1033
 31637  
 31638  LBB4_764:
 31639  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 31640  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31641  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31642  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31643  	LONG $0x02e9c149         // shr    r9, 2
 31644  	LONG $0x01c18349         // add    r9, 1
 31645  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31646  	JE   LBB4_1037
 31647  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31648  	LONG $0xfee78348         // and    rdi, -2
 31649  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31650  	WORD $0xf631             // xor    esi, esi
 31651  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 31652  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 31653  	QUAD $0x000000b0a56f0f66 // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 31654  
 31655  LBB4_766:
 31656  	LONG $0x2c6f0ff3; BYTE $0xf1               // movdqu    xmm5, oword [rcx + 8*rsi]
 31657  	LONG $0x746f0ff3; WORD $0x10f1             // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 31658  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31659  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
 31660  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31661  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31662  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31663  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 31664  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31665  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31666  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 31667  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31668  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31669  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31670  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 31671  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31672  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31673  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31674  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31675  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31676  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31677  	LONG $0x7e0f4166; WORD $0x702c             // movd    dword [r8 + 2*rsi], xmm5
 31678  	LONG $0x7e0f4166; WORD $0x7074; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm6
 31679  	LONG $0x6c6f0ff3; WORD $0x20f1             // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 31680  	LONG $0x746f0ff3; WORD $0x30f1             // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 31681  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31682  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
 31683  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31684  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31685  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31686  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 31687  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31688  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31689  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 31690  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31691  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31692  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31693  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 31694  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31695  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31696  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31697  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31698  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31699  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31700  	LONG $0x7e0f4166; WORD $0x706c; BYTE $0x08 // movd    dword [r8 + 2*rsi + 8], xmm5
 31701  	LONG $0x7e0f4166; WORD $0x7074; BYTE $0x0c // movd    dword [r8 + 2*rsi + 12], xmm6
 31702  	LONG $0x08c68348                           // add    rsi, 8
 31703  	LONG $0x02c78348                           // add    rdi, 2
 31704  	JNE  LBB4_766
 31705  	JMP  LBB4_1038
 31706  
 31707  LBB4_767:
 31708  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 31709  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 31710  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 31711  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 31712  	LONG $0x02e9c149         // shr    r9, 2
 31713  	LONG $0x01c18349         // add    r9, 1
 31714  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 31715  	JE   LBB4_1159
 31716  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 31717  	LONG $0xfee78348         // and    rdi, -2
 31718  	WORD $0xf748; BYTE $0xdf // neg    rdi
 31719  	WORD $0xf631             // xor    esi, esi
 31720  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 31721  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 31722  	QUAD $0x000000b0a56f0f66 // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 31723  
 31724  LBB4_769:
 31725  	LONG $0x2c6f0ff3; BYTE $0xf1               // movdqu    xmm5, oword [rcx + 8*rsi]
 31726  	LONG $0x746f0ff3; WORD $0x10f1             // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 31727  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31728  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
 31729  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31730  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31731  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31732  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 31733  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31734  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31735  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 31736  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31737  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31738  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31739  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 31740  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31741  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31742  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31743  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31744  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31745  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31746  	LONG $0x7e0f4166; WORD $0x702c             // movd    dword [r8 + 2*rsi], xmm5
 31747  	LONG $0x7e0f4166; WORD $0x7074; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm6
 31748  	LONG $0x6c6f0ff3; WORD $0x20f1             // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 31749  	LONG $0x746f0ff3; WORD $0x30f1             // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 31750  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31751  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
 31752  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31753  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31754  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31755  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 31756  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31757  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31758  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 31759  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31760  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31761  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31762  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 31763  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31764  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31765  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31766  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31767  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31768  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31769  	LONG $0x7e0f4166; WORD $0x706c; BYTE $0x08 // movd    dword [r8 + 2*rsi + 8], xmm5
 31770  	LONG $0x7e0f4166; WORD $0x7074; BYTE $0x0c // movd    dword [r8 + 2*rsi + 12], xmm6
 31771  	LONG $0x08c68348                           // add    rsi, 8
 31772  	LONG $0x02c78348                           // add    rdi, 2
 31773  	JNE  LBB4_769
 31774  	JMP  LBB4_1160
 31775  
 31776  LBB4_770:
 31777  	WORD $0xc689                 // mov    esi, eax
 31778  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 31779  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 31780  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 31781  	LONG $0x03e9c149             // shr    r9, 3
 31782  	LONG $0x01c18349             // add    r9, 1
 31783  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 31784  	JE   LBB4_1165
 31785  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 31786  	LONG $0xfee28348             // and    rdx, -2
 31787  	WORD $0xf748; BYTE $0xda     // neg    rdx
 31788  	WORD $0xff31                 // xor    edi, edi
 31789  	WORD $0x570f; BYTE $0xe4     // xorps    xmm4, xmm4
 31790  	LONG $0x760f4566; BYTE $0xc0 // pcmpeqd    xmm8, xmm8
 31791  	LONG $0x756f0f66; BYTE $0x70 // movdqa    xmm6, oword 112[rbp] /* [rip + .LCPI4_11] */
 31792  
 31793  LBB4_772:
 31794  	LONG $0xb904100f                           // movups    xmm0, oword [rcx + 4*rdi]
 31795  	LONG $0xb94c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rdi + 16]
 31796  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 31797  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 31798  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 31799  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 31800  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 31801  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31802  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 31803  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31804  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 31805  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31806  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 31807  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 31808  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 31809  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31810  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 31811  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 31812  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 31813  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 31814  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31815  	LONG $0xfd6c0f66                           // punpcklqdq    xmm7, xmm5
 31816  	LONG $0x7f0f41f3; WORD $0x783c             // movdqu    oword [r8 + 2*rdi], xmm7
 31817  	LONG $0xb944100f; BYTE $0x20               // movups    xmm0, oword [rcx + 4*rdi + 32]
 31818  	LONG $0xb94c100f; BYTE $0x30               // movups    xmm1, oword [rcx + 4*rdi + 48]
 31819  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 31820  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 31821  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 31822  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 31823  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 31824  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31825  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 31826  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31827  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 31828  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 31829  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 31830  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31831  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 31832  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31833  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 31834  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 31835  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31836  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 31837  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 31838  	LONG $0xef6c0f66                           // punpcklqdq    xmm5, xmm7
 31839  	LONG $0x7f0f41f3; WORD $0x786c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm5
 31840  	LONG $0x10c78348                           // add    rdi, 16
 31841  	LONG $0x02c28348                           // add    rdx, 2
 31842  	JNE  LBB4_772
 31843  	JMP  LBB4_1166
 31844  
 31845  LBB4_773:
 31846  	WORD $0xc689                 // mov    esi, eax
 31847  	WORD $0xe683; BYTE $0xf8     // and    esi, -8
 31848  	LONG $0xf8568d48             // lea    rdx, [rsi - 8]
 31849  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 31850  	LONG $0x03e9c149             // shr    r9, 3
 31851  	LONG $0x01c18349             // add    r9, 1
 31852  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 31853  	JE   LBB4_1171
 31854  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 31855  	LONG $0xfee28348             // and    rdx, -2
 31856  	WORD $0xf748; BYTE $0xda     // neg    rdx
 31857  	WORD $0xff31                 // xor    edi, edi
 31858  	WORD $0x570f; BYTE $0xe4     // xorps    xmm4, xmm4
 31859  	LONG $0x760f4566; BYTE $0xc0 // pcmpeqd    xmm8, xmm8
 31860  	LONG $0x756f0f66; BYTE $0x70 // movdqa    xmm6, oword 112[rbp] /* [rip + .LCPI4_11] */
 31861  
 31862  LBB4_775:
 31863  	LONG $0xb904100f                           // movups    xmm0, oword [rcx + 4*rdi]
 31864  	LONG $0xb94c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rdi + 16]
 31865  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 31866  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 31867  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 31868  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 31869  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 31870  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31871  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 31872  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31873  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 31874  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31875  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 31876  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 31877  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 31878  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31879  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 31880  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 31881  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 31882  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 31883  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31884  	LONG $0xfd6c0f66                           // punpcklqdq    xmm7, xmm5
 31885  	LONG $0x7f0f41f3; WORD $0x783c             // movdqu    oword [r8 + 2*rdi], xmm7
 31886  	LONG $0xb944100f; BYTE $0x20               // movups    xmm0, oword [rcx + 4*rdi + 32]
 31887  	LONG $0xb94c100f; BYTE $0x30               // movups    xmm1, oword [rcx + 4*rdi + 48]
 31888  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 31889  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 31890  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 31891  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 31892  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 31893  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 31894  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 31895  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31896  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 31897  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 31898  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 31899  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31900  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 31901  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31902  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 31903  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 31904  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31905  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 31906  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 31907  	LONG $0xef6c0f66                           // punpcklqdq    xmm5, xmm7
 31908  	LONG $0x7f0f41f3; WORD $0x786c; BYTE $0x10 // movdqu    oword [r8 + 2*rdi + 16], xmm5
 31909  	LONG $0x10c78348                           // add    rdi, 16
 31910  	LONG $0x02c28348                           // add    rdx, 2
 31911  	JNE  LBB4_775
 31912  	JMP  LBB4_1172
 31913  
 31914  LBB4_786:
 31915  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 31916  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 31917  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 31918  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 31919  	LONG $0x03e9c149             // shr    r9, 3
 31920  	LONG $0x01c18349             // add    r9, 1
 31921  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 31922  	JE   LBB4_1043
 31923  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 31924  	LONG $0xfee78348             // and    rdi, -2
 31925  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 31926  	WORD $0xf631                 // xor    esi, esi
 31927  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 31928  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 31929  	LONG $0x656f0f66; BYTE $0x70 // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 31930  
 31931  LBB4_788:
 31932  	LONG $0x2c6f0ff3; BYTE $0xb1               // movdqu    xmm5, oword [rcx + 4*rsi]
 31933  	LONG $0x746f0ff3; WORD $0x10b1             // movdqu    xmm6, oword [rcx + 4*rsi + 16]
 31934  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31935  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 31936  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31937  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31938  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 31939  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31940  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 31941  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31942  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31943  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 31944  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31945  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31946  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31947  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31948  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31949  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 31950  	LONG $0x7f0f41f3; WORD $0x702c             // movdqu    oword [r8 + 2*rsi], xmm5
 31951  	LONG $0x6c6f0ff3; WORD $0x20b1             // movdqu    xmm5, oword [rcx + 4*rsi + 32]
 31952  	LONG $0x746f0ff3; WORD $0x30b1             // movdqu    xmm6, oword [rcx + 4*rsi + 48]
 31953  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31954  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 31955  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31956  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31957  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 31958  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 31959  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 31960  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 31961  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 31962  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 31963  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 31964  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 31965  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 31966  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 31967  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 31968  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 31969  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm5
 31970  	LONG $0x10c68348                           // add    rsi, 16
 31971  	LONG $0x02c78348                           // add    rdi, 2
 31972  	JNE  LBB4_788
 31973  	JMP  LBB4_1044
 31974  
 31975  LBB4_789:
 31976  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 31977  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 31978  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 31979  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 31980  	LONG $0x03e9c149             // shr    r9, 3
 31981  	LONG $0x01c18349             // add    r9, 1
 31982  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 31983  	JE   LBB4_1049
 31984  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 31985  	LONG $0xfee78348             // and    rdi, -2
 31986  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 31987  	WORD $0xf631                 // xor    esi, esi
 31988  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 31989  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 31990  	LONG $0x656f0f66; BYTE $0x70 // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 31991  
 31992  LBB4_791:
 31993  	LONG $0x2c6f0ff3; BYTE $0xb1               // movdqu    xmm5, oword [rcx + 4*rsi]
 31994  	LONG $0x746f0ff3; WORD $0x10b1             // movdqu    xmm6, oword [rcx + 4*rsi + 16]
 31995  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 31996  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 31997  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 31998  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 31999  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 32000  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 32001  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 32002  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32003  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 32004  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 32005  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32006  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 32007  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 32008  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32009  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 32010  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 32011  	LONG $0x7f0f41f3; WORD $0x702c             // movdqu    oword [r8 + 2*rsi], xmm5
 32012  	LONG $0x6c6f0ff3; WORD $0x20b1             // movdqu    xmm5, oword [rcx + 4*rsi + 32]
 32013  	LONG $0x746f0ff3; WORD $0x30b1             // movdqu    xmm6, oword [rcx + 4*rsi + 48]
 32014  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 32015  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 32016  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 32017  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 32018  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 32019  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 32020  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 32021  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32022  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 32023  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 32024  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32025  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 32026  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 32027  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32028  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 32029  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 32030  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm5
 32031  	LONG $0x10c68348                           // add    rsi, 16
 32032  	LONG $0x02c78348                           // add    rdi, 2
 32033  	JNE  LBB4_791
 32034  	JMP  LBB4_1050
 32035  
 32036  LBB4_792:
 32037  	WORD $0xc289             // mov    edx, eax
 32038  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32039  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 32040  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32041  	LONG $0x02e9c149         // shr    r9, 2
 32042  	LONG $0x01c18349         // add    r9, 1
 32043  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32044  	JE   LBB4_1177
 32045  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32046  	LONG $0xfee78348         // and    rdi, -2
 32047  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32048  	WORD $0xf631             // xor    esi, esi
 32049  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 32050  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 32051  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 32052  
 32053  LBB4_794:
 32054  	LONG $0x1c7e0ff3; BYTE $0xb1               // movq    xmm3, qword [rcx + 4*rsi]
 32055  	LONG $0x647e0ff3; WORD $0x08b1             // movq    xmm4, qword [rcx + 4*rsi + 8]
 32056  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 32057  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32058  	LONG $0x35380f66; BYTE $0xdb               // pmovzxdq    xmm3, xmm3
 32059  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32060  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 32061  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32062  	LONG $0x35380f66; BYTE $0xe4               // pmovzxdq    xmm4, xmm4
 32063  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32064  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 32065  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 32066  	LONG $0x5c7e0ff3; WORD $0x10b1             // movq    xmm3, qword [rcx + 4*rsi + 16]
 32067  	LONG $0x647e0ff3; WORD $0x18b1             // movq    xmm4, qword [rcx + 4*rsi + 24]
 32068  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 32069  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32070  	LONG $0x35380f66; BYTE $0xdb               // pmovzxdq    xmm3, xmm3
 32071  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32072  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 32073  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32074  	LONG $0x35380f66; BYTE $0xe4               // pmovzxdq    xmm4, xmm4
 32075  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32076  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 32077  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 32078  	LONG $0x08c68348                           // add    rsi, 8
 32079  	LONG $0x02c78348                           // add    rdi, 2
 32080  	JNE  LBB4_794
 32081  	JMP  LBB4_1178
 32082  
 32083  LBB4_795:
 32084  	WORD $0xc289             // mov    edx, eax
 32085  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 32086  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 32087  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32088  	LONG $0x03e9c149         // shr    r9, 3
 32089  	LONG $0x01c18349         // add    r9, 1
 32090  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32091  	JE   LBB4_1182
 32092  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32093  	LONG $0xfee78348         // and    rdi, -2
 32094  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32095  	WORD $0xf631             // xor    esi, esi
 32096  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 32097  	QUAD $0x000000d08d6f0f66 // movdqa    xmm1, oword 208[rbp] /* [rip + .LCPI4_19] */
 32098  
 32099  LBB4_797:
 32100  	LONG $0x146f0ff3; BYTE $0xb1               // movdqu    xmm2, oword [rcx + 4*rsi]
 32101  	LONG $0x5c6f0ff3; WORD $0x10b1             // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 32102  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 32103  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 32104  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 32105  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 32106  	LONG $0x7f0f41f3; WORD $0xb014             // movdqu    oword [r8 + 4*rsi], xmm2
 32107  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm3
 32108  	LONG $0x546f0ff3; WORD $0x20b1             // movdqu    xmm2, oword [rcx + 4*rsi + 32]
 32109  	LONG $0x5c6f0ff3; WORD $0x30b1             // movdqu    xmm3, oword [rcx + 4*rsi + 48]
 32110  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 32111  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 32112  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 32113  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 32114  	LONG $0x7f0f41f3; WORD $0xb054; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm2
 32115  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm3
 32116  	LONG $0x10c68348                           // add    rsi, 16
 32117  	LONG $0x02c78348                           // add    rdi, 2
 32118  	JNE  LBB4_797
 32119  	JMP  LBB4_1183
 32120  
 32121  LBB4_798:
 32122  	WORD $0xc289                 // mov    edx, eax
 32123  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 32124  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 32125  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 32126  	LONG $0x02e9c149             // shr    r9, 2
 32127  	LONG $0x01c18349             // add    r9, 1
 32128  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 32129  	JE   LBB4_1190
 32130  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 32131  	LONG $0xfee78348             // and    rdi, -2
 32132  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 32133  	WORD $0xf631                 // xor    esi, esi
 32134  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 32135  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 32136  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI4_1] */
 32137  
 32138  LBB4_800:
 32139  	LONG $0x1c100f66; BYTE $0xf1               // movupd    xmm3, oword [rcx + 8*rsi]
 32140  	LONG $0x64100f66; WORD $0x10f1             // movupd    xmm4, oword [rcx + 8*rsi + 16]
 32141  	LONG $0xeb280f66                           // movapd    xmm5, xmm3
 32142  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 32143  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 32144  	LONG $0xf4280f66                           // movapd    xmm6, xmm4
 32145  	LONG $0xf1540f66                           // andpd    xmm6, xmm1
 32146  	LONG $0xf2560f66                           // orpd    xmm6, xmm2
 32147  	LONG $0x2c0f48f2; BYTE $0xdd               // cvttsd2si    rbx, xmm5
 32148  	LONG $0x6e0f4866; BYTE $0xfb               // movq    xmm7, rbx
 32149  	LONG $0xed700f66; BYTE $0xee               // pshufd    xmm5, xmm5, 238
 32150  	LONG $0x2c0f48f2; BYTE $0xdd               // cvttsd2si    rbx, xmm5
 32151  	LONG $0x6e0f4866; BYTE $0xeb               // movq    xmm5, rbx
 32152  	LONG $0xfd6c0f66                           // punpcklqdq    xmm7, xmm5
 32153  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 32154  	LONG $0x6e0f4866; BYTE $0xeb               // movq    xmm5, rbx
 32155  	LONG $0xf6700f66; BYTE $0xee               // pshufd    xmm6, xmm6, 238
 32156  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 32157  	LONG $0x6e0f4866; BYTE $0xf3               // movq    xmm6, rbx
 32158  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 32159  	LONG $0xd8c20f66; BYTE $0x04               // cmpneqpd    xmm3, xmm0
 32160  	LONG $0xdf540f66                           // andpd    xmm3, xmm7
 32161  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 32162  	LONG $0xe5540f66                           // andpd    xmm4, xmm5
 32163  	LONG $0x110f4166; WORD $0xf01c             // movupd    oword [r8 + 8*rsi], xmm3
 32164  	LONG $0x110f4166; WORD $0xf064; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm4
 32165  	LONG $0x5c100f66; WORD $0x20f1             // movupd    xmm3, oword [rcx + 8*rsi + 32]
 32166  	LONG $0x64100f66; WORD $0x30f1             // movupd    xmm4, oword [rcx + 8*rsi + 48]
 32167  	LONG $0xeb280f66                           // movapd    xmm5, xmm3
 32168  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 32169  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 32170  	LONG $0xf4280f66                           // movapd    xmm6, xmm4
 32171  	LONG $0xf1540f66                           // andpd    xmm6, xmm1
 32172  	LONG $0xf2560f66                           // orpd    xmm6, xmm2
 32173  	LONG $0x2c0f48f2; BYTE $0xdd               // cvttsd2si    rbx, xmm5
 32174  	LONG $0x6e0f4866; BYTE $0xfb               // movq    xmm7, rbx
 32175  	LONG $0xed700f66; BYTE $0xee               // pshufd    xmm5, xmm5, 238
 32176  	LONG $0x2c0f48f2; BYTE $0xdd               // cvttsd2si    rbx, xmm5
 32177  	LONG $0x6e0f4866; BYTE $0xeb               // movq    xmm5, rbx
 32178  	LONG $0xfd6c0f66                           // punpcklqdq    xmm7, xmm5
 32179  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 32180  	LONG $0x6e0f4866; BYTE $0xeb               // movq    xmm5, rbx
 32181  	LONG $0xf6700f66; BYTE $0xee               // pshufd    xmm6, xmm6, 238
 32182  	LONG $0x2c0f48f2; BYTE $0xde               // cvttsd2si    rbx, xmm6
 32183  	LONG $0x6e0f4866; BYTE $0xf3               // movq    xmm6, rbx
 32184  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 32185  	LONG $0xd8c20f66; BYTE $0x04               // cmpneqpd    xmm3, xmm0
 32186  	LONG $0xdf540f66                           // andpd    xmm3, xmm7
 32187  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 32188  	LONG $0xe5540f66                           // andpd    xmm4, xmm5
 32189  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm3
 32190  	LONG $0x110f4166; WORD $0xf064; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm4
 32191  	LONG $0x08c68348                           // add    rsi, 8
 32192  	LONG $0x02c78348                           // add    rdi, 2
 32193  	JNE  LBB4_800
 32194  	JMP  LBB4_1191
 32195  
 32196  LBB4_801:
 32197  	WORD $0xc289                 // mov    edx, eax
 32198  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 32199  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 32200  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 32201  	LONG $0x02e9c149             // shr    r9, 2
 32202  	LONG $0x01c18349             // add    r9, 1
 32203  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 32204  	JE   LBB4_1196
 32205  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 32206  	LONG $0xfee78348             // and    rdi, -2
 32207  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 32208  	WORD $0xf631                 // xor    esi, esi
 32209  	LONG $0x570f4566; BYTE $0xc0 // xorpd    xmm8, xmm8
 32210  	LONG $0x4d5a0f66; BYTE $0x10 // cvtpd2ps    xmm1, oword 16[rbp] /* [rip + .LCPI4_1] */
 32211  	LONG $0x4d280f44; BYTE $0x20 // movaps    xmm9, oword 32[rbp] /* [rip + .LCPI4_3] */
 32212  	LONG $0xd9160ff3             // movshdup    xmm3, xmm1
 32213  	LONG $0xd9540f41             // andps    xmm3, xmm9
 32214  	LONG $0xc9540f41             // andps    xmm1, xmm9
 32215  
 32216  LBB4_803:
 32217  	LONG $0x24100f66; BYTE $0xf1   // movupd    xmm4, oword [rcx + 8*rsi]
 32218  	LONG $0x74100f66; WORD $0x10f1 // movupd    xmm6, oword [rcx + 8*rsi + 16]
 32219  	WORD $0x570f; BYTE $0xed       // xorps    xmm5, xmm5
 32220  	LONG $0xec5a0ff2               // cvtsd2ss    xmm5, xmm4
 32221  	LONG $0xc20f4166; WORD $0x00e0 // cmpeqpd    xmm4, xmm8
 32222  	LONG $0xe8e4c60f               // shufps    xmm4, xmm4, 232
 32223  	WORD $0x570f; BYTE $0xff       // xorps    xmm7, xmm7
 32224  	LONG $0xfe5a0ff2               // cvtsd2ss    xmm7, xmm6
 32225  	LONG $0xc20f4166; WORD $0x00f0 // cmpeqpd    xmm6, xmm8
 32226  	LONG $0xe8f6c60f               // shufps    xmm6, xmm6, 232
 32227  	LONG $0x44100ff2; WORD $0x08f1 // movsd    xmm0, qword [rcx + 8*rsi + 8]
 32228  	LONG $0xc05a0ff2               // cvtsd2ss    xmm0, xmm0
 32229  	LONG $0xd1280f41               // movaps    xmm2, xmm9
 32230  	WORD $0x550f; BYTE $0xd0       // andnps    xmm2, xmm0
 32231  	WORD $0x560f; BYTE $0xd3       // orps    xmm2, xmm3
 32232  	LONG $0xc1280f41               // movaps    xmm0, xmm9
 32233  	WORD $0x550f; BYTE $0xc5       // andnps    xmm0, xmm5
 32234  	WORD $0x560f; BYTE $0xc1       // orps    xmm0, xmm1
 32235  	WORD $0x140f; BYTE $0xc2       // unpcklps    xmm0, xmm2
 32236  	WORD $0x550f; BYTE $0xe0       // andnps    xmm4, xmm0
 32237  	LONG $0x44100ff2; WORD $0x18f1 // movsd    xmm0, qword [rcx + 8*rsi + 24]
 32238  	LONG $0xc05a0ff2               // cvtsd2ss    xmm0, xmm0
 32239  	LONG $0xd1280f41               // movaps    xmm2, xmm9
 32240  	WORD $0x550f; BYTE $0xd0       // andnps    xmm2, xmm0
 32241  	WORD $0x560f; BYTE $0xd3       // orps    xmm2, xmm3
 32242  	LONG $0xc1280f41               // movaps    xmm0, xmm9
 32243  	WORD $0x550f; BYTE $0xc7       // andnps    xmm0, xmm7
 32244  	WORD $0x560f; BYTE $0xc1       // orps    xmm0, xmm1
 32245  	WORD $0x140f; BYTE $0xc2       // unpcklps    xmm0, xmm2
 32246  	WORD $0x550f; BYTE $0xf0       // andnps    xmm6, xmm0
 32247  	WORD $0x160f; BYTE $0xe6       // movlhps    xmm4, xmm6
 32248  	LONG $0x24110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm4
 32249  	LONG $0x64100f66; WORD $0x20f1 // movupd    xmm4, oword [rcx + 8*rsi + 32]
 32250  	LONG $0x44100f66; WORD $0x30f1 // movupd    xmm0, oword [rcx + 8*rsi + 48]
 32251  	WORD $0x570f; BYTE $0xd2       // xorps    xmm2, xmm2
 32252  	LONG $0xd45a0ff2               // cvtsd2ss    xmm2, xmm4
 32253  	LONG $0xc20f4166; WORD $0x00e0 // cmpeqpd    xmm4, xmm8
 32254  	LONG $0xe8e4c60f               // shufps    xmm4, xmm4, 232
 32255  	WORD $0x570f; BYTE $0xed       // xorps    xmm5, xmm5
 32256  	LONG $0xe85a0ff2               // cvtsd2ss    xmm5, xmm0
 32257  	LONG $0xc20f4166; WORD $0x00c0 // cmpeqpd    xmm0, xmm8
 32258  	LONG $0x74100ff2; WORD $0x28f1 // movsd    xmm6, qword [rcx + 8*rsi + 40]
 32259  	LONG $0xf65a0ff2               // cvtsd2ss    xmm6, xmm6
 32260  	LONG $0xe8c0c60f               // shufps    xmm0, xmm0, 232
 32261  	LONG $0xf9280f41               // movaps    xmm7, xmm9
 32262  	WORD $0x550f; BYTE $0xfe       // andnps    xmm7, xmm6
 32263  	WORD $0x560f; BYTE $0xfb       // orps    xmm7, xmm3
 32264  	LONG $0xf1280f41               // movaps    xmm6, xmm9
 32265  	WORD $0x550f; BYTE $0xf2       // andnps    xmm6, xmm2
 32266  	WORD $0x560f; BYTE $0xf1       // orps    xmm6, xmm1
 32267  	WORD $0x140f; BYTE $0xf7       // unpcklps    xmm6, xmm7
 32268  	WORD $0x550f; BYTE $0xe6       // andnps    xmm4, xmm6
 32269  	LONG $0x54100ff2; WORD $0x38f1 // movsd    xmm2, qword [rcx + 8*rsi + 56]
 32270  	LONG $0xd25a0ff2               // cvtsd2ss    xmm2, xmm2
 32271  	LONG $0xf1280f41               // movaps    xmm6, xmm9
 32272  	WORD $0x550f; BYTE $0xf2       // andnps    xmm6, xmm2
 32273  	WORD $0x560f; BYTE $0xf3       // orps    xmm6, xmm3
 32274  	LONG $0xd1280f41               // movaps    xmm2, xmm9
 32275  	WORD $0x550f; BYTE $0xd5       // andnps    xmm2, xmm5
 32276  	WORD $0x560f; BYTE $0xd1       // orps    xmm2, xmm1
 32277  	WORD $0x140f; BYTE $0xd6       // unpcklps    xmm2, xmm6
 32278  	WORD $0x550f; BYTE $0xc2       // andnps    xmm0, xmm2
 32279  	WORD $0x160f; BYTE $0xe0       // movlhps    xmm4, xmm0
 32280  	LONG $0x64110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm4
 32281  	LONG $0x08c68348               // add    rsi, 8
 32282  	LONG $0x02c78348               // add    rdi, 2
 32283  	JNE  LBB4_803
 32284  	JMP  LBB4_1197
 32285  
 32286  LBB4_819:
 32287  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32288  	WORD $0xf631             // xor    esi, esi
 32289  	QUAD $0x0000012885100ff3 // movss    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 32290  	JMP  LBB4_821
 32291  
 32292  LBB4_820:
 32293  	LONG $0x110f41f3; WORD $0xb04c; BYTE $0x0c // movss    dword [r8 + 4*rsi + 12], xmm1
 32294  	LONG $0x04c68348                           // add    rsi, 4
 32295  	WORD $0x3948; BYTE $0xf2                   // cmp    rdx, rsi
 32296  	JE   LBB4_387
 32297  
 32298  LBB4_821:
 32299  	LONG $0xf13c8348; BYTE $0x00   // cmp    qword [rcx + 8*rsi], 0
 32300  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 32301  	JNE  LBB4_822
 32302  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 32303  	LONG $0x110f41f3; WORD $0xb00c // movss    dword [r8 + 4*rsi], xmm1
 32304  	LONG $0xf17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rsi + 8], 0
 32305  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 32306  	JE   LBB4_826
 32307  
 32308  LBB4_823:
 32309  	LONG $0x110f41f3; WORD $0xb04c; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm1
 32310  	LONG $0xf17c8348; WORD $0x0010             // cmp    qword [rcx + 8*rsi + 16], 0
 32311  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 32312  	JNE  LBB4_824
 32313  
 32314  LBB4_827:
 32315  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 32316  	LONG $0x110f41f3; WORD $0xb04c; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm1
 32317  	LONG $0xf17c8348; WORD $0x0018             // cmp    qword [rcx + 8*rsi + 24], 0
 32318  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 32319  	JNE  LBB4_820
 32320  	JMP  LBB4_828
 32321  
 32322  LBB4_822:
 32323  	LONG $0x110f41f3; WORD $0xb00c // movss    dword [r8 + 4*rsi], xmm1
 32324  	LONG $0xf17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rsi + 8], 0
 32325  	LONG $0xc8280f66               // movapd    xmm1, xmm0
 32326  	JNE  LBB4_823
 32327  
 32328  LBB4_826:
 32329  	LONG $0xc9570f66                           // xorpd    xmm1, xmm1
 32330  	LONG $0x110f41f3; WORD $0xb04c; BYTE $0x04 // movss    dword [r8 + 4*rsi + 4], xmm1
 32331  	LONG $0xf17c8348; WORD $0x0010             // cmp    qword [rcx + 8*rsi + 16], 0
 32332  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 32333  	JE   LBB4_827
 32334  
 32335  LBB4_824:
 32336  	LONG $0x110f41f3; WORD $0xb04c; BYTE $0x08 // movss    dword [r8 + 4*rsi + 8], xmm1
 32337  	LONG $0xf17c8348; WORD $0x0018             // cmp    qword [rcx + 8*rsi + 24], 0
 32338  	LONG $0xc8280f66                           // movapd    xmm1, xmm0
 32339  	JNE  LBB4_820
 32340  
 32341  LBB4_828:
 32342  	LONG $0xc9570f66 // xorpd    xmm1, xmm1
 32343  	JMP  LBB4_820
 32344  
 32345  LBB4_829:
 32346  	WORD $0xc289             // mov    edx, eax
 32347  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32348  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 32349  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32350  	LONG $0x02e9c149         // shr    r9, 2
 32351  	LONG $0x01c18349         // add    r9, 1
 32352  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32353  	JE   LBB4_1055
 32354  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32355  	LONG $0xfee78348         // and    rdi, -2
 32356  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32357  	WORD $0xf631             // xor    esi, esi
 32358  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 32359  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 32360  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 32361  
 32362  LBB4_831:
 32363  	LONG $0x1c6e0f66; BYTE $0x71               // movd    xmm3, dword [rcx + 2*rsi]
 32364  	LONG $0x646e0f66; WORD $0x0471             // movd    xmm4, dword [rcx + 2*rsi + 4]
 32365  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 32366  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32367  	LONG $0x34380f66; BYTE $0xdb               // pmovzxwq    xmm3, xmm3
 32368  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32369  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 32370  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32371  	LONG $0x34380f66; BYTE $0xe4               // pmovzxwq    xmm4, xmm4
 32372  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32373  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 32374  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 32375  	LONG $0x5c6e0f66; WORD $0x0871             // movd    xmm3, dword [rcx + 2*rsi + 8]
 32376  	LONG $0x646e0f66; WORD $0x0c71             // movd    xmm4, dword [rcx + 2*rsi + 12]
 32377  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 32378  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32379  	LONG $0x34380f66; BYTE $0xdb               // pmovzxwq    xmm3, xmm3
 32380  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32381  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 32382  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32383  	LONG $0x34380f66; BYTE $0xe4               // pmovzxwq    xmm4, xmm4
 32384  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32385  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 32386  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 32387  	LONG $0x08c68348                           // add    rsi, 8
 32388  	LONG $0x02c78348                           // add    rdi, 2
 32389  	JNE  LBB4_831
 32390  	JMP  LBB4_1056
 32391  
 32392  LBB4_832:
 32393  	WORD $0xc289                 // mov    edx, eax
 32394  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 32395  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 32396  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 32397  	LONG $0x03e9c149             // shr    r9, 3
 32398  	LONG $0x01c18349             // add    r9, 1
 32399  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 32400  	JE   LBB4_1204
 32401  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 32402  	LONG $0xfee78348             // and    rdi, -2
 32403  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 32404  	WORD $0xf631                 // xor    esi, esi
 32405  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 32406  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 32407  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 32408  
 32409  LBB4_834:
 32410  	LONG $0x1c7e0ff3; BYTE $0x71   // movq    xmm3, qword [rcx + 2*rsi]
 32411  	LONG $0x647e0ff3; WORD $0x0871 // movq    xmm4, qword [rcx + 2*rsi + 8]
 32412  	LONG $0xd8750f66               // pcmpeqw    xmm3, xmm0
 32413  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 32414  	LONG $0x33380f66; BYTE $0xdb   // pmovzxwd    xmm3, xmm3
 32415  	LONG $0xdadb0f66               // pand    xmm3, xmm2
 32416  	WORD $0x5b0f; BYTE $0xdb       // cvtdq2ps    xmm3, xmm3
 32417  	LONG $0xe0750f66               // pcmpeqw    xmm4, xmm0
 32418  	LONG $0xe1ef0f66               // pxor    xmm4, xmm1
 32419  	LONG $0x33380f66; BYTE $0xe4   // pmovzxwd    xmm4, xmm4
 32420  	LONG $0xe2db0f66               // pand    xmm4, xmm2
 32421  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 32422  	LONG $0x1c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm3
 32423  	LONG $0x64110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm4
 32424  	LONG $0x5c7e0ff3; WORD $0x1071 // movq    xmm3, qword [rcx + 2*rsi + 16]
 32425  	LONG $0x647e0ff3; WORD $0x1871 // movq    xmm4, qword [rcx + 2*rsi + 24]
 32426  	LONG $0xd8750f66               // pcmpeqw    xmm3, xmm0
 32427  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 32428  	LONG $0x33380f66; BYTE $0xdb   // pmovzxwd    xmm3, xmm3
 32429  	LONG $0xdadb0f66               // pand    xmm3, xmm2
 32430  	WORD $0x5b0f; BYTE $0xdb       // cvtdq2ps    xmm3, xmm3
 32431  	LONG $0xe0750f66               // pcmpeqw    xmm4, xmm0
 32432  	LONG $0xe1ef0f66               // pxor    xmm4, xmm1
 32433  	LONG $0x33380f66; BYTE $0xe4   // pmovzxwd    xmm4, xmm4
 32434  	LONG $0xe2db0f66               // pand    xmm4, xmm2
 32435  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 32436  	LONG $0x5c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm3
 32437  	LONG $0x64110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm4
 32438  	LONG $0x10c68348               // add    rsi, 16
 32439  	LONG $0x02c78348               // add    rdi, 2
 32440  	JNE  LBB4_834
 32441  	JMP  LBB4_1205
 32442  
 32443  LBB4_835:
 32444  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 32445  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32446  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 32447  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32448  	LONG $0x02e9c149         // shr    r9, 2
 32449  	LONG $0x01c18349         // add    r9, 1
 32450  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32451  	JE   LBB4_1212
 32452  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32453  	LONG $0xfee78348         // and    rdi, -2
 32454  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32455  	WORD $0xf631             // xor    esi, esi
 32456  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 32457  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 32458  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 32459  
 32460  LBB4_837:
 32461  	LONG $0x2c6e0f66; BYTE $0x71               // movd    xmm5, dword [rcx + 2*rsi]
 32462  	LONG $0x746e0f66; WORD $0x0471             // movd    xmm6, dword [rcx + 2*rsi + 4]
 32463  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 32464  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 32465  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 32466  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 32467  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 32468  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 32469  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 32470  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32471  	LONG $0x24380f66; BYTE $0xed               // pmovsxwq    xmm5, xmm5
 32472  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 32473  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32474  	LONG $0x24380f66; BYTE $0xf6               // pmovsxwq    xmm6, xmm6
 32475  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 32476  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32477  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 32478  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 32479  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 32480  	LONG $0x6c6e0f66; WORD $0x0871             // movd    xmm5, dword [rcx + 2*rsi + 8]
 32481  	LONG $0x746e0f66; WORD $0x0c71             // movd    xmm6, dword [rcx + 2*rsi + 12]
 32482  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 32483  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 32484  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 32485  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 32486  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 32487  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 32488  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 32489  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32490  	LONG $0x24380f66; BYTE $0xed               // pmovsxwq    xmm5, xmm5
 32491  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 32492  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32493  	LONG $0x24380f66; BYTE $0xf6               // pmovsxwq    xmm6, xmm6
 32494  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 32495  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32496  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 32497  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 32498  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 32499  	LONG $0x08c68348                           // add    rsi, 8
 32500  	LONG $0x02c78348                           // add    rdi, 2
 32501  	JNE  LBB4_837
 32502  	JMP  LBB4_1213
 32503  
 32504  LBB4_838:
 32505  	WORD $0xc289                               // mov    edx, eax
 32506  	WORD $0xe283; BYTE $0xf8                   // and    edx, -8
 32507  	LONG $0xf8728d48                           // lea    rsi, [rdx - 8]
 32508  	WORD $0x8949; BYTE $0xf1                   // mov    r9, rsi
 32509  	LONG $0x03e9c149                           // shr    r9, 3
 32510  	LONG $0x01c18349                           // add    r9, 1
 32511  	WORD $0x8548; BYTE $0xf6                   // test    rsi, rsi
 32512  	JE   LBB4_1218
 32513  	WORD $0x894c; BYTE $0xcf                   // mov    rdi, r9
 32514  	LONG $0xfee78348                           // and    rdi, -2
 32515  	WORD $0xf748; BYTE $0xdf                   // neg    rdi
 32516  	WORD $0xf631                               // xor    esi, esi
 32517  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 32518  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 32519  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 32520  
 32521  LBB4_840:
 32522  	LONG $0x2c7e0ff3; BYTE $0x71   // movq    xmm5, qword [rcx + 2*rsi]
 32523  	LONG $0x747e0ff3; WORD $0x0871 // movq    xmm6, qword [rcx + 2*rsi + 8]
 32524  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32525  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 32526  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 32527  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32528  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 32529  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 32530  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 32531  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32532  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 32533  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 32534  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 32535  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32536  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 32537  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 32538  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32539  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32540  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32541  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 32542  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 32543  	LONG $0x6c7e0ff3; WORD $0x1071 // movq    xmm5, qword [rcx + 2*rsi + 16]
 32544  	LONG $0x747e0ff3; WORD $0x1871 // movq    xmm6, qword [rcx + 2*rsi + 24]
 32545  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32546  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 32547  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 32548  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32549  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 32550  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 32551  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 32552  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32553  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 32554  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 32555  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 32556  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32557  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 32558  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 32559  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32560  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32561  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32562  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 32563  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 32564  	LONG $0x10c68348               // add    rsi, 16
 32565  	LONG $0x02c78348               // add    rdi, 2
 32566  	JNE  LBB4_840
 32567  	JMP  LBB4_1219
 32568  
 32569  LBB4_846:
 32570  	WORD $0xd689             // mov    esi, edx
 32571  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 32572  	WORD $0xc031             // xor    eax, eax
 32573  	QUAD $0x0000013085100ff3 // movss    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 32574  	QUAD $0x000001288d100ff3 // movss    xmm1, dword 296[rbp] /* [rip + .LCPI4_5] */
 32575  	JMP  LBB4_848
 32576  
 32577  LBB4_847:
 32578  	LONG $0x110f41f3; WORD $0x805c; BYTE $0x04 // movss    dword [r8 + 4*rax + 4], xmm3
 32579  	LONG $0x02c08348                           // add    rax, 2
 32580  	WORD $0x3948; BYTE $0xc6                   // cmp    rsi, rax
 32581  	JE   LBB4_410
 32582  
 32583  LBB4_848:
 32584  	LONG $0xc13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rax], 0
 32585  	LONG $0xd0280f66             // movapd    xmm2, xmm0
 32586  	JNE  LBB4_849
 32587  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 32588  	LONG $0xd9280f66             // movapd    xmm3, xmm1
 32589  	JLE  LBB4_853
 32590  
 32591  LBB4_850:
 32592  	LONG $0x110f41f3; WORD $0x801c // movss    dword [r8 + 4*rax], xmm3
 32593  	LONG $0xc17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rax + 8], 0
 32594  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 32595  	JNE  LBB4_851
 32596  
 32597  LBB4_854:
 32598  	LONG $0xd2570f66 // xorpd    xmm2, xmm2
 32599  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 32600  	JG   LBB4_847
 32601  	JMP  LBB4_855
 32602  
 32603  LBB4_849:
 32604  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 32605  	JG   LBB4_850
 32606  
 32607  LBB4_853:
 32608  	LONG $0xda280f66               // movapd    xmm3, xmm2
 32609  	LONG $0x110f41f3; WORD $0x801c // movss    dword [r8 + 4*rax], xmm3
 32610  	LONG $0xc17c8348; WORD $0x0008 // cmp    qword [rcx + 8*rax + 8], 0
 32611  	LONG $0xd0280f66               // movapd    xmm2, xmm0
 32612  	JE   LBB4_854
 32613  
 32614  LBB4_851:
 32615  	LONG $0xd9280f66 // movapd    xmm3, xmm1
 32616  	JG   LBB4_847
 32617  
 32618  LBB4_855:
 32619  	LONG $0xda280f66 // movapd    xmm3, xmm2
 32620  	JMP  LBB4_847
 32621  
 32622  LBB4_856:
 32623  	WORD $0xd689             // mov    esi, edx
 32624  	WORD $0xe683; BYTE $0xfe // and    esi, -2
 32625  	WORD $0xc031             // xor    eax, eax
 32626  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 32627  	JMP  LBB4_859
 32628  
 32629  LBB4_857:
 32630  	WORD $0x500f; BYTE $0xf9     // movmskps    edi, xmm1
 32631  	WORD $0xe783; BYTE $0x01     // and    edi, 1
 32632  	WORD $0xdff7                 // neg    edi
 32633  	WORD $0xcf83; BYTE $0x01     // or    edi, 1
 32634  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 32635  	LONG $0xcf2a0ff3             // cvtsi2ss    xmm1, edi
 32636  	LONG $0x2c0f48f3; BYTE $0xf9 // cvttss2si    rdi, xmm1
 32637  	LONG $0xc07c8949; BYTE $0x08 // mov    qword [r8 + 8*rax + 8], rdi
 32638  	LONG $0x02c08348             // add    rax, 2
 32639  	WORD $0x3948; BYTE $0xc6     // cmp    rsi, rax
 32640  	JE   LBB4_416
 32641  
 32642  LBB4_859:
 32643  	LONG $0x0c100ff3; BYTE $0x81 // movss    xmm1, dword [rcx + 4*rax]
 32644  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
 32645  	JNE  LBB4_861
 32646  	WORD $0xff31                 // xor    edi, edi
 32647  	JMP  LBB4_862
 32648  
 32649  LBB4_861:
 32650  	WORD $0x500f; BYTE $0xf9     // movmskps    edi, xmm1
 32651  	WORD $0xe783; BYTE $0x01     // and    edi, 1
 32652  	WORD $0xdff7                 // neg    edi
 32653  	WORD $0xcf83; BYTE $0x01     // or    edi, 1
 32654  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 32655  	LONG $0xcf2a0ff3             // cvtsi2ss    xmm1, edi
 32656  	LONG $0x2c0f48f3; BYTE $0xf9 // cvttss2si    rdi, xmm1
 32657  
 32658  LBB4_862:
 32659  	LONG $0xc03c8949               // mov    qword [r8 + 8*rax], rdi
 32660  	LONG $0x4c100ff3; WORD $0x0481 // movss    xmm1, dword [rcx + 4*rax + 4]
 32661  	WORD $0x2e0f; BYTE $0xc1       // ucomiss    xmm0, xmm1
 32662  	JNE  LBB4_857
 32663  	WORD $0xff31                   // xor    edi, edi
 32664  	LONG $0xc07c8949; BYTE $0x08   // mov    qword [r8 + 8*rax + 8], rdi
 32665  	LONG $0x02c08348               // add    rax, 2
 32666  	WORD $0x3948; BYTE $0xc6       // cmp    rsi, rax
 32667  	JNE  LBB4_859
 32668  
 32669  LBB4_416:
 32670  	WORD $0xc2f6; BYTE $0x01     // test    dl, 1
 32671  	JE   LBB4_1655
 32672  	LONG $0x04100ff3; BYTE $0x81 // movss    xmm0, dword [rcx + 4*rax]
 32673  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 32674  	WORD $0x2e0f; BYTE $0xc8     // ucomiss    xmm1, xmm0
 32675  	JNE  LBB4_1104
 32676  	WORD $0xc931                 // xor    ecx, ecx
 32677  	JMP  LBB4_1105
 32678  
 32679  LBB4_884:
 32680  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 32681  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32682  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 32683  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32684  	LONG $0x02e9c149         // shr    r9, 2
 32685  	LONG $0x01c18349         // add    r9, 1
 32686  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32687  	JE   LBB4_1060
 32688  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32689  	LONG $0xfee78348         // and    rdi, -2
 32690  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32691  	WORD $0xf631             // xor    esi, esi
 32692  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 32693  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 32694  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 32695  
 32696  LBB4_886:
 32697  	LONG $0x2c7e0ff3; BYTE $0xb1               // movq    xmm5, qword [rcx + 4*rsi]
 32698  	LONG $0x747e0ff3; WORD $0x08b1             // movq    xmm6, qword [rcx + 4*rsi + 8]
 32699  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 32700  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 32701  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 32702  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 32703  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 32704  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 32705  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 32706  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32707  	LONG $0x25380f66; BYTE $0xed               // pmovsxdq    xmm5, xmm5
 32708  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 32709  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32710  	LONG $0x25380f66; BYTE $0xf6               // pmovsxdq    xmm6, xmm6
 32711  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 32712  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32713  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 32714  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 32715  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 32716  	LONG $0x6c7e0ff3; WORD $0x10b1             // movq    xmm5, qword [rcx + 4*rsi + 16]
 32717  	LONG $0x747e0ff3; WORD $0x18b1             // movq    xmm6, qword [rcx + 4*rsi + 24]
 32718  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 32719  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 32720  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 32721  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 32722  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 32723  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 32724  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 32725  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 32726  	LONG $0x25380f66; BYTE $0xed               // pmovsxdq    xmm5, xmm5
 32727  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 32728  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 32729  	LONG $0x25380f66; BYTE $0xf6               // pmovsxdq    xmm6, xmm6
 32730  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 32731  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 32732  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 32733  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 32734  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 32735  	LONG $0x08c68348                           // add    rsi, 8
 32736  	LONG $0x02c78348                           // add    rdi, 2
 32737  	JNE  LBB4_886
 32738  	JMP  LBB4_1061
 32739  
 32740  LBB4_887:
 32741  	WORD $0xc289                               // mov    edx, eax
 32742  	WORD $0xe283; BYTE $0xf8                   // and    edx, -8
 32743  	LONG $0xf8728d48                           // lea    rsi, [rdx - 8]
 32744  	WORD $0x8949; BYTE $0xf1                   // mov    r9, rsi
 32745  	LONG $0x03e9c149                           // shr    r9, 3
 32746  	LONG $0x01c18349                           // add    r9, 1
 32747  	WORD $0x8548; BYTE $0xf6                   // test    rsi, rsi
 32748  	JE   LBB4_1066
 32749  	WORD $0x894c; BYTE $0xcf                   // mov    rdi, r9
 32750  	LONG $0xfee78348                           // and    rdi, -2
 32751  	WORD $0xf748; BYTE $0xdf                   // neg    rdi
 32752  	WORD $0xf631                               // xor    esi, esi
 32753  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 32754  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 32755  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 32756  
 32757  LBB4_889:
 32758  	LONG $0x2c6f0ff3; BYTE $0xb1   // movdqu    xmm5, oword [rcx + 4*rsi]
 32759  	LONG $0x746f0ff3; WORD $0x10b1 // movdqu    xmm6, oword [rcx + 4*rsi + 16]
 32760  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32761  	LONG $0xc2660f66               // pcmpgtd    xmm0, xmm2
 32762  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32763  	LONG $0xca660f66               // pcmpgtd    xmm1, xmm2
 32764  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 32765  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32766  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 32767  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 32768  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32769  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 32770  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32771  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32772  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32773  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 32774  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 32775  	LONG $0x6c6f0ff3; WORD $0x20b1 // movdqu    xmm5, oword [rcx + 4*rsi + 32]
 32776  	LONG $0x746f0ff3; WORD $0x30b1 // movdqu    xmm6, oword [rcx + 4*rsi + 48]
 32777  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32778  	LONG $0xc2660f66               // pcmpgtd    xmm0, xmm2
 32779  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32780  	LONG $0xca660f66               // pcmpgtd    xmm1, xmm2
 32781  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 32782  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32783  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 32784  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 32785  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32786  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 32787  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32788  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32789  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32790  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 32791  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 32792  	LONG $0x10c68348               // add    rsi, 16
 32793  	LONG $0x02c78348               // add    rdi, 2
 32794  	JNE  LBB4_889
 32795  	JMP  LBB4_1067
 32796  
 32797  LBB4_945:
 32798  	WORD $0xc689                 // mov    esi, eax
 32799  	WORD $0xe683; BYTE $0xfc     // and    esi, -4
 32800  	LONG $0xfc568d48             // lea    rdx, [rsi - 4]
 32801  	WORD $0x8949; BYTE $0xd1     // mov    r9, rdx
 32802  	LONG $0x02e9c149             // shr    r9, 2
 32803  	LONG $0x01c18349             // add    r9, 1
 32804  	WORD $0x8548; BYTE $0xd2     // test    rdx, rdx
 32805  	JE   LBB4_1076
 32806  	WORD $0x894c; BYTE $0xca     // mov    rdx, r9
 32807  	LONG $0xfee28348             // and    rdx, -2
 32808  	WORD $0xf748; BYTE $0xda     // neg    rdx
 32809  	WORD $0xff31                 // xor    edi, edi
 32810  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 32811  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 32812  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI4_1] */
 32813  
 32814  LBB4_947:
 32815  	LONG $0x1c100f66; BYTE $0xf9   // movupd    xmm3, oword [rcx + 8*rdi]
 32816  	LONG $0x64100f66; WORD $0x10f9 // movupd    xmm4, oword [rcx + 8*rdi + 16]
 32817  	LONG $0xeb280f66               // movapd    xmm5, xmm3
 32818  	LONG $0xe8c20f66; BYTE $0x00   // cmpeqpd    xmm5, xmm0
 32819  	LONG $0xe8edc60f               // shufps    xmm5, xmm5, 232
 32820  	LONG $0xf4280f66               // movapd    xmm6, xmm4
 32821  	LONG $0xf0c20f66; BYTE $0x00   // cmpeqpd    xmm6, xmm0
 32822  	LONG $0xe8f6c60f               // shufps    xmm6, xmm6, 232
 32823  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 32824  	LONG $0xda560f66               // orpd    xmm3, xmm2
 32825  	LONG $0xe1540f66               // andpd    xmm4, xmm1
 32826  	LONG $0xe2560f66               // orpd    xmm4, xmm2
 32827  	LONG $0xdbe60f66               // cvttpd2dq    xmm3, xmm3
 32828  	LONG $0xe4e60f66               // cvttpd2dq    xmm4, xmm4
 32829  	WORD $0x550f; BYTE $0xeb       // andnps    xmm5, xmm3
 32830  	WORD $0x550f; BYTE $0xf4       // andnps    xmm6, xmm4
 32831  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 32832  	LONG $0x2c110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm5
 32833  	LONG $0x5c100f66; WORD $0x20f9 // movupd    xmm3, oword [rcx + 8*rdi + 32]
 32834  	LONG $0x64100f66; WORD $0x30f9 // movupd    xmm4, oword [rcx + 8*rdi + 48]
 32835  	LONG $0xeb280f66               // movapd    xmm5, xmm3
 32836  	LONG $0xe8c20f66; BYTE $0x00   // cmpeqpd    xmm5, xmm0
 32837  	LONG $0xe8edc60f               // shufps    xmm5, xmm5, 232
 32838  	LONG $0xf4280f66               // movapd    xmm6, xmm4
 32839  	LONG $0xf0c20f66; BYTE $0x00   // cmpeqpd    xmm6, xmm0
 32840  	LONG $0xe8f6c60f               // shufps    xmm6, xmm6, 232
 32841  	LONG $0xd9540f66               // andpd    xmm3, xmm1
 32842  	LONG $0xda560f66               // orpd    xmm3, xmm2
 32843  	LONG $0xe1540f66               // andpd    xmm4, xmm1
 32844  	LONG $0xe2560f66               // orpd    xmm4, xmm2
 32845  	LONG $0xdbe60f66               // cvttpd2dq    xmm3, xmm3
 32846  	WORD $0x550f; BYTE $0xeb       // andnps    xmm5, xmm3
 32847  	LONG $0xdce60f66               // cvttpd2dq    xmm3, xmm4
 32848  	WORD $0x550f; BYTE $0xf3       // andnps    xmm6, xmm3
 32849  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 32850  	LONG $0x6c110f41; WORD $0x10b8 // movups    oword [r8 + 4*rdi + 16], xmm5
 32851  	LONG $0x08c78348               // add    rdi, 8
 32852  	LONG $0x02c28348               // add    rdx, 2
 32853  	JNE  LBB4_947
 32854  	JMP  LBB4_1077
 32855  
 32856  LBB4_953:
 32857  	WORD $0xc289             // mov    edx, eax
 32858  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 32859  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 32860  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32861  	LONG $0x02e9c149         // shr    r9, 2
 32862  	LONG $0x01c18349         // add    r9, 1
 32863  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32864  	JE   LBB4_1082
 32865  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32866  	LONG $0xfee78348         // and    rdi, -2
 32867  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32868  	WORD $0xf631             // xor    esi, esi
 32869  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 32870  	QUAD $0x000000a08d6f0f66 // movdqa    xmm1, oword 160[rbp] /* [rip + .LCPI4_16] */
 32871  
 32872  LBB4_955:
 32873  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 32874  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 32875  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 32876  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 32877  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 32878  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 32879  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 32880  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 32881  	LONG $0xd36c0f66                           // punpcklqdq    xmm2, xmm3
 32882  	LONG $0x7f0f41f3; WORD $0xb014             // movdqu    oword [r8 + 4*rsi], xmm2
 32883  	LONG $0x546f0ff3; WORD $0x20f1             // movdqu    xmm2, oword [rcx + 8*rsi + 32]
 32884  	LONG $0x5c6f0ff3; WORD $0x30f1             // movdqu    xmm3, oword [rcx + 8*rsi + 48]
 32885  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 32886  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 32887  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 32888  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 32889  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 32890  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 32891  	LONG $0xd36c0f66                           // punpcklqdq    xmm2, xmm3
 32892  	LONG $0x7f0f41f3; WORD $0xb054; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm2
 32893  	LONG $0x08c68348                           // add    rsi, 8
 32894  	LONG $0x02c78348                           // add    rdi, 2
 32895  	JNE  LBB4_955
 32896  	JMP  LBB4_1083
 32897  
 32898  LBB4_956:
 32899  	WORD $0xc289                 // mov    edx, eax
 32900  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 32901  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 32902  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 32903  	LONG $0x03e9c149             // shr    r9, 3
 32904  	LONG $0x01c18349             // add    r9, 1
 32905  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 32906  	JE   LBB4_1087
 32907  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 32908  	LONG $0xfee78348             // and    rdi, -2
 32909  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 32910  	WORD $0xf631                 // xor    esi, esi
 32911  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 32912  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 32913  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 32914  
 32915  LBB4_958:
 32916  	LONG $0x1c7e0ff3; BYTE $0x71               // movq    xmm3, qword [rcx + 2*rsi]
 32917  	LONG $0x647e0ff3; WORD $0x0871             // movq    xmm4, qword [rcx + 2*rsi + 8]
 32918  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 32919  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32920  	LONG $0x33380f66; BYTE $0xdb               // pmovzxwd    xmm3, xmm3
 32921  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32922  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 32923  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32924  	LONG $0x33380f66; BYTE $0xe4               // pmovzxwd    xmm4, xmm4
 32925  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32926  	LONG $0x7f0f41f3; WORD $0xb01c             // movdqu    oword [r8 + 4*rsi], xmm3
 32927  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm4
 32928  	LONG $0x5c7e0ff3; WORD $0x1071             // movq    xmm3, qword [rcx + 2*rsi + 16]
 32929  	LONG $0x647e0ff3; WORD $0x1871             // movq    xmm4, qword [rcx + 2*rsi + 24]
 32930  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 32931  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 32932  	LONG $0x33380f66; BYTE $0xdb               // pmovzxwd    xmm3, xmm3
 32933  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 32934  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 32935  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 32936  	LONG $0x33380f66; BYTE $0xe4               // pmovzxwd    xmm4, xmm4
 32937  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 32938  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm3
 32939  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm4
 32940  	LONG $0x10c68348                           // add    rsi, 16
 32941  	LONG $0x02c78348                           // add    rdi, 2
 32942  	JNE  LBB4_958
 32943  	JMP  LBB4_1088
 32944  
 32945  LBB4_959:
 32946  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 32947  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 32948  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 32949  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 32950  	LONG $0x03e9c149         // shr    r9, 3
 32951  	LONG $0x01c18349         // add    r9, 1
 32952  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 32953  	JE   LBB4_1092
 32954  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 32955  	LONG $0xfee78348         // and    rdi, -2
 32956  	WORD $0xf748; BYTE $0xdf // neg    rdi
 32957  	WORD $0xf631             // xor    esi, esi
 32958  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 32959  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 32960  	LONG $0x5065280f         // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 32961  
 32962  LBB4_961:
 32963  	LONG $0x2c7e0ff3; BYTE $0x71   // movq    xmm5, qword [rcx + 2*rsi]
 32964  	LONG $0x747e0ff3; WORD $0x0871 // movq    xmm6, qword [rcx + 2*rsi + 8]
 32965  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32966  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 32967  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 32968  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32969  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 32970  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 32971  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 32972  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32973  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 32974  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 32975  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32976  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 32977  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32978  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32979  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32980  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 32981  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 32982  	LONG $0x6c7e0ff3; WORD $0x1071 // movq    xmm5, qword [rcx + 2*rsi + 16]
 32983  	LONG $0x747e0ff3; WORD $0x1871 // movq    xmm6, qword [rcx + 2*rsi + 24]
 32984  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 32985  	LONG $0xc2650f66               // pcmpgtw    xmm0, xmm2
 32986  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 32987  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 32988  	LONG $0xca650f66               // pcmpgtw    xmm1, xmm2
 32989  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 32990  	LONG $0xea750f66               // pcmpeqw    xmm5, xmm2
 32991  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 32992  	LONG $0x23380f66; BYTE $0xed   // pmovsxwd    xmm5, xmm5
 32993  	LONG $0xf2750f66               // pcmpeqw    xmm6, xmm2
 32994  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 32995  	LONG $0x23380f66; BYTE $0xf6   // pmovsxwd    xmm6, xmm6
 32996  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 32997  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 32998  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 32999  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 33000  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 33001  	LONG $0x10c68348               // add    rsi, 16
 33002  	LONG $0x02c78348               // add    rdi, 2
 33003  	JNE  LBB4_961
 33004  	JMP  LBB4_1093
 33005  
 33006  LBB4_962:
 33007  	WORD $0x8944; BYTE $0xd2                   // mov    edx, r10d
 33008  	WORD $0xe283; BYTE $0xfc                   // and    edx, -4
 33009  	LONG $0xfc728d48                           // lea    rsi, [rdx - 4]
 33010  	WORD $0x8949; BYTE $0xf1                   // mov    r9, rsi
 33011  	LONG $0x02e9c149                           // shr    r9, 2
 33012  	LONG $0x01c18349                           // add    r9, 1
 33013  	WORD $0x8548; BYTE $0xf6                   // test    rsi, rsi
 33014  	JE   LBB4_1098
 33015  	WORD $0x894c; BYTE $0xcf                   // mov    rdi, r9
 33016  	LONG $0xfee78348                           // and    rdi, -2
 33017  	WORD $0xf748; BYTE $0xdf                   // neg    rdi
 33018  	WORD $0xf631                               // xor    esi, esi
 33019  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 33020  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 33021  	LONG $0xa0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 160[rbp] /* [rip + .LCPI4_16] */
 33022  
 33023  LBB4_964:
 33024  	LONG $0x2c6f0ff3; BYTE $0xf1   // movdqu    xmm5, oword [rcx + 8*rsi]
 33025  	LONG $0x746f0ff3; WORD $0x10f1 // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 33026  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 33027  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
 33028  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 33029  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 33030  	LONG $0x37380f66; BYTE $0xca   // pcmpgtq    xmm1, xmm2
 33031  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 33032  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 33033  	LONG $0xed700f66; BYTE $0xe8   // pshufd    xmm5, xmm5, 232
 33034  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33035  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 33036  	LONG $0xf6700f66; BYTE $0xe8   // pshufd    xmm6, xmm6, 232
 33037  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33038  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 33039  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33040  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 33041  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 33042  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 33043  	LONG $0x6c6f0ff3; WORD $0x20f1 // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 33044  	LONG $0x746f0ff3; WORD $0x30f1 // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 33045  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 33046  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
 33047  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 33048  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 33049  	LONG $0x37380f66; BYTE $0xca   // pcmpgtq    xmm1, xmm2
 33050  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 33051  	LONG $0x29380f66; BYTE $0xea   // pcmpeqq    xmm5, xmm2
 33052  	LONG $0xed700f66; BYTE $0xe8   // pshufd    xmm5, xmm5, 232
 33053  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33054  	LONG $0x29380f66; BYTE $0xf2   // pcmpeqq    xmm6, xmm2
 33055  	LONG $0xf6700f66; BYTE $0xe8   // pshufd    xmm6, xmm6, 232
 33056  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33057  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 33058  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33059  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 33060  	WORD $0x160f; BYTE $0xee       // movlhps    xmm5, xmm6
 33061  	LONG $0x6c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm5
 33062  	LONG $0x08c68348               // add    rsi, 8
 33063  	LONG $0x02c78348               // add    rdi, 2
 33064  	JNE  LBB4_964
 33065  	JMP  LBB4_1099
 33066  
 33067  LBB4_965:
 33068  	WORD $0xc289                 // mov    edx, eax
 33069  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 33070  	WORD $0xf631                 // xor    esi, esi
 33071  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 33072  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI4_8] */
 33073  
 33074  LBB4_966:
 33075  	LONG $0x146f0ff3; BYTE $0xb1   // movdqu    xmm2, oword [rcx + 4*rsi]
 33076  	LONG $0x5c6f0ff3; WORD $0x10b1 // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 33077  	LONG $0xe26f0f66               // movdqa    xmm4, xmm2
 33078  	LONG $0xe4720f66; BYTE $0x1f   // psrad    xmm4, 31
 33079  	LONG $0xe1eb0f66               // por    xmm4, xmm1
 33080  	LONG $0xeb6f0f66               // movdqa    xmm5, xmm3
 33081  	LONG $0xe5720f66; BYTE $0x1f   // psrad    xmm5, 31
 33082  	LONG $0xe9eb0f66               // por    xmm5, xmm1
 33083  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 33084  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 33085  	LONG $0xe45b0ff3               // cvttps2dq    xmm4, xmm4
 33086  	LONG $0xed5b0ff3               // cvttps2dq    xmm5, xmm5
 33087  	LONG $0x04d0c20f               // cmpneqps    xmm2, xmm0
 33088  	WORD $0x540f; BYTE $0xd4       // andps    xmm2, xmm4
 33089  	LONG $0x04d8c20f               // cmpneqps    xmm3, xmm0
 33090  	WORD $0x540f; BYTE $0xdd       // andps    xmm3, xmm5
 33091  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 33092  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 33093  	LONG $0x08c68348               // add    rsi, 8
 33094  	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
 33095  	JNE  LBB4_966
 33096  	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
 33097  	JE   LBB4_1655
 33098  
 33099  LBB4_968:
 33100  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 33101  	JMP  LBB4_970
 33102  
 33103  LBB4_969:
 33104  	LONG $0x90348941         // mov    dword [r8 + 4*rdx], esi
 33105  	LONG $0x01c28348         // add    rdx, 1
 33106  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 33107  	JE   LBB4_1655
 33108  
 33109  LBB4_970:
 33110  	LONG $0x0c100ff3; BYTE $0x91 // movss    xmm1, dword [rcx + 4*rdx]
 33111  	WORD $0xf631                 // xor    esi, esi
 33112  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
 33113  	JE   LBB4_969
 33114  	WORD $0x500f; BYTE $0xf1     // movmskps    esi, xmm1
 33115  	WORD $0xe683; BYTE $0x01     // and    esi, 1
 33116  	WORD $0xdef7                 // neg    esi
 33117  	WORD $0xce83; BYTE $0x01     // or    esi, 1
 33118  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 33119  	LONG $0xce2a0ff3             // cvtsi2ss    xmm1, esi
 33120  	LONG $0xf12c0ff3             // cvttss2si    esi, xmm1
 33121  	JMP  LBB4_969
 33122  
 33123  LBB4_496:
 33124  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 33125  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 33126  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 33127  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33128  	LONG $0x03e9c149             // shr    r9, 3
 33129  	LONG $0x01c18349             // add    r9, 1
 33130  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33131  	JE   LBB4_1228
 33132  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33133  	LONG $0xfee78348             // and    rdi, -2
 33134  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33135  	WORD $0xf631                 // xor    esi, esi
 33136  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 33137  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI4_8] */
 33138  
 33139  LBB4_498:
 33140  	LONG $0x146f0ff3; BYTE $0xb1               // movdqu    xmm2, oword [rcx + 4*rsi]
 33141  	LONG $0x5c6f0ff3; WORD $0x10b1             // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 33142  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 33143  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 33144  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 33145  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 33146  	LONG $0x7f0f41f3; WORD $0xb014             // movdqu    oword [r8 + 4*rsi], xmm2
 33147  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm3
 33148  	LONG $0x546f0ff3; WORD $0x20b1             // movdqu    xmm2, oword [rcx + 4*rsi + 32]
 33149  	LONG $0x5c6f0ff3; WORD $0x30b1             // movdqu    xmm3, oword [rcx + 4*rsi + 48]
 33150  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 33151  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 33152  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 33153  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 33154  	LONG $0x7f0f41f3; WORD $0xb054; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm2
 33155  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm3
 33156  	LONG $0x10c68348                           // add    rsi, 16
 33157  	LONG $0x02c78348                           // add    rdi, 2
 33158  	JNE  LBB4_498
 33159  	JMP  LBB4_1229
 33160  
 33161  LBB4_504:
 33162  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 33163  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 33164  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 33165  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33166  	LONG $0x03e9c149         // shr    r9, 3
 33167  	LONG $0x01c18349         // add    r9, 1
 33168  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33169  	JE   LBB4_1236
 33170  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33171  	LONG $0xfee78348         // and    rdi, -2
 33172  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33173  	WORD $0xf631             // xor    esi, esi
 33174  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33175  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33176  	LONG $0x5065280f         // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 33177  
 33178  LBB4_506:
 33179  	LONG $0x2c6e0f66; BYTE $0x31   // movd    xmm5, dword [rcx + rsi]
 33180  	LONG $0x746e0f66; WORD $0x0431 // movd    xmm6, dword [rcx + rsi + 4]
 33181  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 33182  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 33183  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 33184  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 33185  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 33186  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 33187  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 33188  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33189  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 33190  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 33191  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33192  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 33193  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 33194  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33195  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 33196  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 33197  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 33198  	LONG $0x6c6e0f66; WORD $0x0831 // movd    xmm5, dword [rcx + rsi + 8]
 33199  	LONG $0x746e0f66; WORD $0x0c31 // movd    xmm6, dword [rcx + rsi + 12]
 33200  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 33201  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 33202  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 33203  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 33204  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 33205  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 33206  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 33207  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33208  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 33209  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 33210  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33211  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 33212  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 33213  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33214  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 33215  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 33216  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 33217  	LONG $0x10c68348               // add    rsi, 16
 33218  	LONG $0x02c78348               // add    rdi, 2
 33219  	JNE  LBB4_506
 33220  	JMP  LBB4_1237
 33221  
 33222  LBB4_524:
 33223  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 33224  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 33225  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 33226  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33227  	LONG $0x03e9c149             // shr    r9, 3
 33228  	LONG $0x01c18349             // add    r9, 1
 33229  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33230  	JE   LBB4_1245
 33231  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33232  	LONG $0xfee78348             // and    rdi, -2
 33233  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33234  	WORD $0xf631                 // xor    esi, esi
 33235  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 33236  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 33237  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 33238  
 33239  LBB4_526:
 33240  	LONG $0x1c6e0f66; BYTE $0x31               // movd    xmm3, dword [rcx + rsi]
 33241  	LONG $0x646e0f66; WORD $0x0431             // movd    xmm4, dword [rcx + rsi + 4]
 33242  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 33243  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33244  	LONG $0x31380f66; BYTE $0xdb               // pmovzxbd    xmm3, xmm3
 33245  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33246  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 33247  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33248  	LONG $0x31380f66; BYTE $0xe4               // pmovzxbd    xmm4, xmm4
 33249  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33250  	LONG $0x7f0f41f3; WORD $0xb01c             // movdqu    oword [r8 + 4*rsi], xmm3
 33251  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm4
 33252  	LONG $0x5c6e0f66; WORD $0x0831             // movd    xmm3, dword [rcx + rsi + 8]
 33253  	LONG $0x646e0f66; WORD $0x0c31             // movd    xmm4, dword [rcx + rsi + 12]
 33254  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 33255  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33256  	LONG $0x31380f66; BYTE $0xdb               // pmovzxbd    xmm3, xmm3
 33257  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33258  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 33259  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33260  	LONG $0x31380f66; BYTE $0xe4               // pmovzxbd    xmm4, xmm4
 33261  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33262  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm3
 33263  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm4
 33264  	LONG $0x10c68348                           // add    rsi, 16
 33265  	LONG $0x02c78348                           // add    rdi, 2
 33266  	JNE  LBB4_526
 33267  	JMP  LBB4_1246
 33268  
 33269  LBB4_529:
 33270  	WORD $0x8944; BYTE $0xda     // mov    edx, r11d
 33271  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 33272  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 33273  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33274  	LONG $0x03e9c149             // shr    r9, 3
 33275  	LONG $0x01c18349             // add    r9, 1
 33276  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33277  	JE   LBB4_1253
 33278  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33279  	LONG $0xfee78348             // and    rdi, -2
 33280  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33281  	WORD $0xf631                 // xor    esi, esi
 33282  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 33283  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 33284  	LONG $0x656f0f66; BYTE $0x50 // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 33285  
 33286  LBB4_531:
 33287  	LONG $0x2c6f0ff3; BYTE $0xb1   // movdqu    xmm5, oword [rcx + 4*rsi]
 33288  	LONG $0x746f0ff3; WORD $0x10b1 // movdqu    xmm6, oword [rcx + 4*rsi + 16]
 33289  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 33290  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 33291  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 33292  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33293  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 33294  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 33295  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 33296  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33297  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 33298  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 33299  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 33300  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33301  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 33302  	LONG $0x3c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm7
 33303  	LONG $0x6c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm5
 33304  	LONG $0x6c6f0ff3; WORD $0x20b1 // movdqu    xmm5, oword [rcx + 4*rsi + 32]
 33305  	LONG $0x746f0ff3; WORD $0x30b1 // movdqu    xmm6, oword [rcx + 4*rsi + 48]
 33306  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 33307  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 33308  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 33309  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 33310  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 33311  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 33312  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 33313  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 33314  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 33315  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 33316  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 33317  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 33318  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 33319  	LONG $0x7c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm7
 33320  	LONG $0x6c110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm5
 33321  	LONG $0x10c68348               // add    rsi, 16
 33322  	LONG $0x02c78348               // add    rdi, 2
 33323  	JNE  LBB4_531
 33324  	JMP  LBB4_1254
 33325  
 33326  LBB4_544:
 33327  	WORD $0xc289                 // mov    edx, eax
 33328  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 33329  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 33330  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33331  	LONG $0x02e9c149             // shr    r9, 2
 33332  	LONG $0x01c18349             // add    r9, 1
 33333  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33334  	JE   LBB4_1262
 33335  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33336  	LONG $0xfee78348             // and    rdi, -2
 33337  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33338  	WORD $0xf631                 // xor    esi, esi
 33339  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 33340  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 33341  	LONG $0x55280f66; BYTE $0x10 // movapd    xmm2, oword 16[rbp] /* [rip + .LCPI4_1] */
 33342  
 33343  LBB4_546:
 33344  	LONG $0x1c100f66; BYTE $0xf1               // movupd    xmm3, oword [rcx + 8*rsi]
 33345  	LONG $0x64100f66; WORD $0x10f1             // movupd    xmm4, oword [rcx + 8*rsi + 16]
 33346  	LONG $0xeb280f66                           // movapd    xmm5, xmm3
 33347  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 33348  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 33349  	LONG $0xf4280f66                           // movapd    xmm6, xmm4
 33350  	LONG $0xf1540f66                           // andpd    xmm6, xmm1
 33351  	LONG $0xf2560f66                           // orpd    xmm6, xmm2
 33352  	LONG $0xd8c20f66; BYTE $0x04               // cmpneqpd    xmm3, xmm0
 33353  	LONG $0xdd540f66                           // andpd    xmm3, xmm5
 33354  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 33355  	LONG $0xe6540f66                           // andpd    xmm4, xmm6
 33356  	LONG $0x110f4166; WORD $0xf01c             // movupd    oword [r8 + 8*rsi], xmm3
 33357  	LONG $0x110f4166; WORD $0xf064; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm4
 33358  	LONG $0x5c100f66; WORD $0x20f1             // movupd    xmm3, oword [rcx + 8*rsi + 32]
 33359  	LONG $0x64100f66; WORD $0x30f1             // movupd    xmm4, oword [rcx + 8*rsi + 48]
 33360  	LONG $0xeb280f66                           // movapd    xmm5, xmm3
 33361  	LONG $0xe9540f66                           // andpd    xmm5, xmm1
 33362  	LONG $0xea560f66                           // orpd    xmm5, xmm2
 33363  	LONG $0xf4280f66                           // movapd    xmm6, xmm4
 33364  	LONG $0xf1540f66                           // andpd    xmm6, xmm1
 33365  	LONG $0xf2560f66                           // orpd    xmm6, xmm2
 33366  	LONG $0xd8c20f66; BYTE $0x04               // cmpneqpd    xmm3, xmm0
 33367  	LONG $0xdd540f66                           // andpd    xmm3, xmm5
 33368  	LONG $0xe0c20f66; BYTE $0x04               // cmpneqpd    xmm4, xmm0
 33369  	LONG $0xe6540f66                           // andpd    xmm4, xmm6
 33370  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm3
 33371  	LONG $0x110f4166; WORD $0xf064; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm4
 33372  	LONG $0x08c68348                           // add    rsi, 8
 33373  	LONG $0x02c78348                           // add    rdi, 2
 33374  	JNE  LBB4_546
 33375  	JMP  LBB4_1263
 33376  
 33377  LBB4_625:
 33378  	WORD $0xc289             // mov    edx, eax
 33379  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 33380  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 33381  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33382  	LONG $0x03e9c149         // shr    r9, 3
 33383  	LONG $0x01c18349         // add    r9, 1
 33384  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33385  	JE   LBB4_1271
 33386  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33387  	LONG $0xfee78348         // and    rdi, -2
 33388  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33389  	WORD $0xf631             // xor    esi, esi
 33390  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 33391  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 33392  	QUAD $0x00000080956f0f66 // movdqa    xmm2, oword 128[rbp] /* [rip + .LCPI4_12] */
 33393  
 33394  LBB4_627:
 33395  	LONG $0x1c6f0ff3; BYTE $0xb1               // movdqu    xmm3, oword [rcx + 4*rsi]
 33396  	LONG $0x646f0ff3; WORD $0x10b1             // movdqu    xmm4, oword [rcx + 4*rsi + 16]
 33397  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 33398  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33399  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 33400  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33401  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33402  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 33403  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33404  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 33405  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 33406  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33407  	LONG $0x7e0f4166; WORD $0x301c             // movd    dword [r8 + rsi], xmm3
 33408  	LONG $0x7e0f4166; WORD $0x3064; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm4
 33409  	LONG $0x5c6f0ff3; WORD $0x20b1             // movdqu    xmm3, oword [rcx + 4*rsi + 32]
 33410  	LONG $0x646f0ff3; WORD $0x30b1             // movdqu    xmm4, oword [rcx + 4*rsi + 48]
 33411  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 33412  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33413  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 33414  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33415  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33416  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 33417  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33418  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 33419  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 33420  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33421  	LONG $0x7e0f4166; WORD $0x305c; BYTE $0x08 // movd    dword [r8 + rsi + 8], xmm3
 33422  	LONG $0x7e0f4166; WORD $0x3064; BYTE $0x0c // movd    dword [r8 + rsi + 12], xmm4
 33423  	LONG $0x10c68348                           // add    rsi, 16
 33424  	LONG $0x02c78348                           // add    rdi, 2
 33425  	JNE  LBB4_627
 33426  	JMP  LBB4_1272
 33427  
 33428  LBB4_630:
 33429  	WORD $0xc289                 // mov    edx, eax
 33430  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 33431  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 33432  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33433  	LONG $0x02e9c149             // shr    r9, 2
 33434  	LONG $0x01c18349             // add    r9, 1
 33435  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33436  	JE   LBB4_1279
 33437  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33438  	LONG $0xfee78348             // and    rdi, -2
 33439  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33440  	WORD $0xf631                 // xor    esi, esi
 33441  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 33442  	LONG $0x5d280f66; BYTE $0x00 // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 33443  	LONG $0x65280f66; BYTE $0x10 // movapd    xmm4, oword 16[rbp] /* [rip + .LCPI4_1] */
 33444  	LONG $0x6d6f0f66; BYTE $0x40 // movdqa    xmm5, oword 64[rbp] /* [rip + .LCPI4_7] */
 33445  
 33446  LBB4_632:
 33447  	LONG $0x34100f66; BYTE $0xf1         // movupd    xmm6, oword [rcx + 8*rsi]
 33448  	LONG $0x7c100f66; WORD $0x10f1       // movupd    xmm7, oword [rcx + 8*rsi + 16]
 33449  	LONG $0xc6280f66                     // movapd    xmm0, xmm6
 33450  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 33451  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33452  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33453  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 33454  	LONG $0xcf280f66                     // movapd    xmm1, xmm7
 33455  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 33456  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33457  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33458  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 33459  	LONG $0xf3540f66                     // andpd    xmm6, xmm3
 33460  	LONG $0xf4560f66                     // orpd    xmm6, xmm4
 33461  	LONG $0xfb540f66                     // andpd    xmm7, xmm3
 33462  	LONG $0xfc560f66                     // orpd    xmm7, xmm4
 33463  	LONG $0xf6e60f66                     // cvttpd2dq    xmm6, xmm6
 33464  	LONG $0x00380f66; BYTE $0xf5         // pshufb    xmm6, xmm5
 33465  	LONG $0xffe60f66                     // cvttpd2dq    xmm7, xmm7
 33466  	LONG $0x00380f66; BYTE $0xfd         // pshufb    xmm7, xmm5
 33467  	LONG $0x10380f66; BYTE $0xf2         // pblendvb    xmm6, xmm2, xmm0
 33468  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 33469  	LONG $0x10380f66; BYTE $0xfa         // pblendvb    xmm7, xmm2, xmm0
 33470  	QUAD $0x003034153a0f4166             // pextrw    word [r8 + rsi], xmm6, 0
 33471  	QUAD $0x02307c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm7, 0
 33472  	LONG $0x74100f66; WORD $0x20f1       // movupd    xmm6, oword [rcx + 8*rsi + 32]
 33473  	LONG $0x7c100f66; WORD $0x30f1       // movupd    xmm7, oword [rcx + 8*rsi + 48]
 33474  	LONG $0xc6280f66                     // movapd    xmm0, xmm6
 33475  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 33476  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33477  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33478  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 33479  	LONG $0xcf280f66                     // movapd    xmm1, xmm7
 33480  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 33481  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33482  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33483  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 33484  	LONG $0xf3540f66                     // andpd    xmm6, xmm3
 33485  	LONG $0xf4560f66                     // orpd    xmm6, xmm4
 33486  	LONG $0xfb540f66                     // andpd    xmm7, xmm3
 33487  	LONG $0xfc560f66                     // orpd    xmm7, xmm4
 33488  	LONG $0xf6e60f66                     // cvttpd2dq    xmm6, xmm6
 33489  	LONG $0x00380f66; BYTE $0xf5         // pshufb    xmm6, xmm5
 33490  	LONG $0xffe60f66                     // cvttpd2dq    xmm7, xmm7
 33491  	LONG $0x00380f66; BYTE $0xfd         // pshufb    xmm7, xmm5
 33492  	LONG $0x10380f66; BYTE $0xf2         // pblendvb    xmm6, xmm2, xmm0
 33493  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 33494  	LONG $0x10380f66; BYTE $0xfa         // pblendvb    xmm7, xmm2, xmm0
 33495  	QUAD $0x043074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 4], xmm6, 0
 33496  	QUAD $0x06307c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 6], xmm7, 0
 33497  	LONG $0x08c68348                     // add    rsi, 8
 33498  	LONG $0x02c78348                     // add    rdi, 2
 33499  	JNE  LBB4_632
 33500  	JMP  LBB4_1280
 33501  
 33502  LBB4_635:
 33503  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 33504  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 33505  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 33506  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 33507  	LONG $0x05e9c149         // shr    r9, 5
 33508  	LONG $0x01c18349         // add    r9, 1
 33509  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 33510  	JE   LBB4_1288
 33511  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33512  	LONG $0xfee78348         // and    rdi, -2
 33513  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33514  	WORD $0xc031             // xor    eax, eax
 33515  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33516  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33517  	QUAD $0x00000100a56f0f66 // movdqa    xmm4, oword 256[rbp] /* [rip + .LCPI4_22] */
 33518  
 33519  LBB4_637:
 33520  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 33521  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 33522  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 33523  	LONG $0xc5640f66                           // pcmpgtb    xmm0, xmm5
 33524  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 33525  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33526  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 33527  	LONG $0xce640f66                           // pcmpgtb    xmm1, xmm6
 33528  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 33529  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33530  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 33531  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 33532  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 33533  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33534  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 33535  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 33536  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 33537  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 33538  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 33539  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 33540  	LONG $0xc5640f66                           // pcmpgtb    xmm0, xmm5
 33541  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 33542  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33543  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 33544  	LONG $0xce640f66                           // pcmpgtb    xmm1, xmm6
 33545  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 33546  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33547  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 33548  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 33549  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 33550  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33551  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 33552  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 33553  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 33554  	LONG $0x40c08348                           // add    rax, 64
 33555  	LONG $0x02c78348                           // add    rdi, 2
 33556  	JNE  LBB4_637
 33557  	JMP  LBB4_1289
 33558  
 33559  LBB4_640:
 33560  	WORD $0xc289             // mov    edx, eax
 33561  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 33562  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 33563  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33564  	LONG $0x02e9c149         // shr    r9, 2
 33565  	LONG $0x01c18349         // add    r9, 1
 33566  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33567  	JE   LBB4_1297
 33568  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33569  	LONG $0xfee78348         // and    rdi, -2
 33570  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33571  	WORD $0xf631             // xor    esi, esi
 33572  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 33573  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 33574  	QUAD $0x000000c0956f0f66 // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI4_18] */
 33575  
 33576  LBB4_642:
 33577  	LONG $0x1c6f0ff3; BYTE $0xf1         // movdqu    xmm3, oword [rcx + 8*rsi]
 33578  	LONG $0x646f0ff3; WORD $0x10f1       // movdqu    xmm4, oword [rcx + 8*rsi + 16]
 33579  	LONG $0x29380f66; BYTE $0xd8         // pcmpeqq    xmm3, xmm0
 33580  	LONG $0xd9ef0f66                     // pxor    xmm3, xmm1
 33581  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 33582  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 33583  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 33584  	LONG $0xdadb0f66                     // pand    xmm3, xmm2
 33585  	LONG $0x29380f66; BYTE $0xe0         // pcmpeqq    xmm4, xmm0
 33586  	LONG $0xe1ef0f66                     // pxor    xmm4, xmm1
 33587  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 33588  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 33589  	LONG $0xe4630f66                     // packsswb    xmm4, xmm4
 33590  	QUAD $0x00301c153a0f4166             // pextrw    word [r8 + rsi], xmm3, 0
 33591  	LONG $0xe2db0f66                     // pand    xmm4, xmm2
 33592  	QUAD $0x023064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm4, 0
 33593  	LONG $0x5c6f0ff3; WORD $0x20f1       // movdqu    xmm3, oword [rcx + 8*rsi + 32]
 33594  	LONG $0x646f0ff3; WORD $0x30f1       // movdqu    xmm4, oword [rcx + 8*rsi + 48]
 33595  	LONG $0x29380f66; BYTE $0xd8         // pcmpeqq    xmm3, xmm0
 33596  	LONG $0xd9ef0f66                     // pxor    xmm3, xmm1
 33597  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 33598  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 33599  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 33600  	LONG $0xdadb0f66                     // pand    xmm3, xmm2
 33601  	LONG $0x29380f66; BYTE $0xe0         // pcmpeqq    xmm4, xmm0
 33602  	LONG $0xe1ef0f66                     // pxor    xmm4, xmm1
 33603  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 33604  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 33605  	LONG $0xe4630f66                     // packsswb    xmm4, xmm4
 33606  	QUAD $0x04305c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 4], xmm3, 0
 33607  	LONG $0xe2db0f66                     // pand    xmm4, xmm2
 33608  	QUAD $0x063064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 6], xmm4, 0
 33609  	LONG $0x08c68348                     // add    rsi, 8
 33610  	LONG $0x02c78348                     // add    rdi, 2
 33611  	JNE  LBB4_642
 33612  	JMP  LBB4_1298
 33613  
 33614  LBB4_645:
 33615  	WORD $0xc289             // mov    edx, eax
 33616  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 33617  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 33618  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33619  	LONG $0x04e9c149         // shr    r9, 4
 33620  	LONG $0x01c18349         // add    r9, 1
 33621  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33622  	JE   LBB4_1305
 33623  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33624  	LONG $0xfee78348         // and    rdi, -2
 33625  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33626  	WORD $0xf631             // xor    esi, esi
 33627  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 33628  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 33629  	QUAD $0x000000f0956f0f66 // movdqa    xmm2, oword 240[rbp] /* [rip + .LCPI4_21] */
 33630  
 33631  LBB4_647:
 33632  	LONG $0x1c6f0ff3; BYTE $0x71               // movdqu    xmm3, oword [rcx + 2*rsi]
 33633  	LONG $0x646f0ff3; WORD $0x1071             // movdqu    xmm4, oword [rcx + 2*rsi + 16]
 33634  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 33635  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33636  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33637  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33638  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 33639  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33640  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 33641  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33642  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 33643  	LONG $0x7f0f41f3; WORD $0x301c             // movdqu    oword [r8 + rsi], xmm3
 33644  	LONG $0x5c6f0ff3; WORD $0x2071             // movdqu    xmm3, oword [rcx + 2*rsi + 32]
 33645  	LONG $0x646f0ff3; WORD $0x3071             // movdqu    xmm4, oword [rcx + 2*rsi + 48]
 33646  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 33647  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 33648  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33649  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 33650  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 33651  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 33652  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 33653  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 33654  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 33655  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm3
 33656  	LONG $0x20c68348                           // add    rsi, 32
 33657  	LONG $0x02c78348                           // add    rdi, 2
 33658  	JNE  LBB4_647
 33659  	JMP  LBB4_1306
 33660  
 33661  LBB4_650:
 33662  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 33663  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 33664  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 33665  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 33666  	LONG $0x04e9c149         // shr    r9, 4
 33667  	LONG $0x01c18349         // add    r9, 1
 33668  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 33669  	JE   LBB4_1313
 33670  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33671  	LONG $0xfee78348         // and    rdi, -2
 33672  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33673  	WORD $0xc031             // xor    eax, eax
 33674  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33675  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33676  	QUAD $0x000000f0a56f0f66 // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 33677  
 33678  LBB4_652:
 33679  	LONG $0x2c6f0ff3; BYTE $0x41               // movdqu    xmm5, oword [rcx + 2*rax]
 33680  	LONG $0x746f0ff3; WORD $0x1041             // movdqu    xmm6, oword [rcx + 2*rax + 16]
 33681  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 33682  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 33683  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33684  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 33685  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 33686  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33687  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 33688  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33689  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 33690  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 33691  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33692  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 33693  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33694  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33695  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 33696  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 33697  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 33698  	LONG $0x6c6f0ff3; WORD $0x2041             // movdqu    xmm5, oword [rcx + 2*rax + 32]
 33699  	LONG $0x746f0ff3; WORD $0x3041             // movdqu    xmm6, oword [rcx + 2*rax + 48]
 33700  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 33701  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 33702  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33703  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 33704  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 33705  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33706  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 33707  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33708  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 33709  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 33710  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33711  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 33712  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33713  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33714  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 33715  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 33716  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 33717  	LONG $0x20c08348                           // add    rax, 32
 33718  	LONG $0x02c78348                           // add    rdi, 2
 33719  	JNE  LBB4_652
 33720  	JMP  LBB4_1314
 33721  
 33722  LBB4_655:
 33723  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 33724  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 33725  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 33726  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 33727  	LONG $0x02e9c149         // shr    r9, 2
 33728  	LONG $0x01c18349         // add    r9, 1
 33729  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 33730  	JE   LBB4_1322
 33731  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33732  	LONG $0xfee78348         // and    rdi, -2
 33733  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33734  	WORD $0xc031             // xor    eax, eax
 33735  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33736  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33737  	QUAD $0x000000c0a56f0f66 // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 33738  
 33739  LBB4_657:
 33740  	LONG $0x2c6f0ff3; BYTE $0xc1         // movdqu    xmm5, oword [rcx + 8*rax]
 33741  	LONG $0x746f0ff3; WORD $0x10c1       // movdqu    xmm6, oword [rcx + 8*rax + 16]
 33742  	LONG $0xc56f0f66                     // movdqa    xmm0, xmm5
 33743  	LONG $0x37380f66; BYTE $0xc2         // pcmpgtq    xmm0, xmm2
 33744  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33745  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33746  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 33747  	LONG $0xce6f0f66                     // movdqa    xmm1, xmm6
 33748  	LONG $0x37380f66; BYTE $0xca         // pcmpgtq    xmm1, xmm2
 33749  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33750  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33751  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 33752  	LONG $0x29380f66; BYTE $0xea         // pcmpeqq    xmm5, xmm2
 33753  	LONG $0xebef0f66                     // pxor    xmm5, xmm3
 33754  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 33755  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 33756  	LONG $0xed630f66                     // packsswb    xmm5, xmm5
 33757  	LONG $0x29380f66; BYTE $0xf2         // pcmpeqq    xmm6, xmm2
 33758  	LONG $0xf3ef0f66                     // pxor    xmm6, xmm3
 33759  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 33760  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 33761  	LONG $0xf6630f66                     // packsswb    xmm6, xmm6
 33762  	LONG $0x10380f66; BYTE $0xec         // pblendvb    xmm5, xmm4, xmm0
 33763  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 33764  	LONG $0x10380f66; BYTE $0xf4         // pblendvb    xmm6, xmm4, xmm0
 33765  	QUAD $0x00002c153a0f4166             // pextrw    word [r8 + rax], xmm5, 0
 33766  	QUAD $0x020074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 2], xmm6, 0
 33767  	LONG $0x6c6f0ff3; WORD $0x20c1       // movdqu    xmm5, oword [rcx + 8*rax + 32]
 33768  	LONG $0x746f0ff3; WORD $0x30c1       // movdqu    xmm6, oword [rcx + 8*rax + 48]
 33769  	LONG $0xc56f0f66                     // movdqa    xmm0, xmm5
 33770  	LONG $0x37380f66; BYTE $0xc2         // pcmpgtq    xmm0, xmm2
 33771  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33772  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 33773  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 33774  	LONG $0xce6f0f66                     // movdqa    xmm1, xmm6
 33775  	LONG $0x37380f66; BYTE $0xca         // pcmpgtq    xmm1, xmm2
 33776  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33777  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 33778  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 33779  	LONG $0x29380f66; BYTE $0xea         // pcmpeqq    xmm5, xmm2
 33780  	LONG $0xebef0f66                     // pxor    xmm5, xmm3
 33781  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 33782  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 33783  	LONG $0xed630f66                     // packsswb    xmm5, xmm5
 33784  	LONG $0x29380f66; BYTE $0xf2         // pcmpeqq    xmm6, xmm2
 33785  	LONG $0xf3ef0f66                     // pxor    xmm6, xmm3
 33786  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 33787  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 33788  	LONG $0xf6630f66                     // packsswb    xmm6, xmm6
 33789  	LONG $0x10380f66; BYTE $0xec         // pblendvb    xmm5, xmm4, xmm0
 33790  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 33791  	LONG $0x10380f66; BYTE $0xf4         // pblendvb    xmm6, xmm4, xmm0
 33792  	QUAD $0x04006c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 4], xmm5, 0
 33793  	QUAD $0x060074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 6], xmm6, 0
 33794  	LONG $0x08c08348                     // add    rax, 8
 33795  	LONG $0x02c78348                     // add    rdi, 2
 33796  	JNE  LBB4_657
 33797  	JMP  LBB4_1323
 33798  
 33799  LBB4_660:
 33800  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 33801  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 33802  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 33803  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 33804  	LONG $0x03e9c149             // shr    r9, 3
 33805  	LONG $0x01c18349             // add    r9, 1
 33806  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 33807  	JE   LBB4_1331
 33808  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 33809  	LONG $0xfee78348             // and    rdi, -2
 33810  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 33811  	WORD $0xf631                 // xor    esi, esi
 33812  	WORD $0x570f; BYTE $0xe4     // xorps    xmm4, xmm4
 33813  	LONG $0x760f4566; BYTE $0xc0 // pcmpeqd    xmm8, xmm8
 33814  	QUAD $0x00000080b56f0f66     // movdqa    xmm6, oword 128[rbp] /* [rip + .LCPI4_12] */
 33815  
 33816  LBB4_662:
 33817  	LONG $0xb104100f                           // movups    xmm0, oword [rcx + 4*rsi]
 33818  	LONG $0xb14c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rsi + 16]
 33819  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 33820  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 33821  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 33822  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 33823  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 33824  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 33825  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 33826  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33827  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 33828  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 33829  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33830  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 33831  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 33832  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33833  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 33834  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 33835  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 33836  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33837  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 33838  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 33839  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 33840  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 33841  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33842  	LONG $0x7e0f4166; WORD $0x303c             // movd    dword [r8 + rsi], xmm7
 33843  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm5
 33844  	LONG $0xb144100f; BYTE $0x20               // movups    xmm0, oword [rcx + 4*rsi + 32]
 33845  	LONG $0xb14c100f; BYTE $0x30               // movups    xmm1, oword [rcx + 4*rsi + 48]
 33846  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 33847  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 33848  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 33849  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 33850  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 33851  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 33852  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 33853  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 33854  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 33855  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 33856  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33857  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 33858  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 33859  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 33860  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 33861  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33862  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 33863  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33864  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 33865  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 33866  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33867  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 33868  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 33869  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x08 // movd    dword [r8 + rsi + 8], xmm5
 33870  	LONG $0x7e0f4166; WORD $0x307c; BYTE $0x0c // movd    dword [r8 + rsi + 12], xmm7
 33871  	LONG $0x10c68348                           // add    rsi, 16
 33872  	LONG $0x02c78348                           // add    rdi, 2
 33873  	JNE  LBB4_662
 33874  	JMP  LBB4_1332
 33875  
 33876  LBB4_665:
 33877  	WORD $0xc289             // mov    edx, eax
 33878  	WORD $0xe283; BYTE $0xe0 // and    edx, -32
 33879  	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
 33880  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33881  	LONG $0x05e9c149         // shr    r9, 5
 33882  	LONG $0x01c18349         // add    r9, 1
 33883  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33884  	JE   LBB4_1340
 33885  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33886  	LONG $0xfee78348         // and    rdi, -2
 33887  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33888  	WORD $0xf631             // xor    esi, esi
 33889  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 33890  	QUAD $0x000001008d6f0f66 // movdqa    xmm1, oword 256[rbp] /* [rip + .LCPI4_22] */
 33891  
 33892  LBB4_667:
 33893  	LONG $0x146f0ff3; BYTE $0x31               // movdqu    xmm2, oword [rcx + rsi]
 33894  	LONG $0x5c6f0ff3; WORD $0x1031             // movdqu    xmm3, oword [rcx + rsi + 16]
 33895  	LONG $0xd0740f66                           // pcmpeqb    xmm2, xmm0
 33896  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 33897  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 33898  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 33899  	LONG $0x7f0f41f3; WORD $0x3014             // movdqu    oword [r8 + rsi], xmm2
 33900  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm3
 33901  	LONG $0x546f0ff3; WORD $0x2031             // movdqu    xmm2, oword [rcx + rsi + 32]
 33902  	LONG $0x5c6f0ff3; WORD $0x3031             // movdqu    xmm3, oword [rcx + rsi + 48]
 33903  	LONG $0xd0740f66                           // pcmpeqb    xmm2, xmm0
 33904  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 33905  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 33906  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 33907  	LONG $0x7f0f41f3; WORD $0x3054; BYTE $0x20 // movdqu    oword [r8 + rsi + 32], xmm2
 33908  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x30 // movdqu    oword [r8 + rsi + 48], xmm3
 33909  	LONG $0x40c68348                           // add    rsi, 64
 33910  	LONG $0x02c78348                           // add    rdi, 2
 33911  	JNE  LBB4_667
 33912  	JMP  LBB4_1341
 33913  
 33914  LBB4_670:
 33915  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 33916  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 33917  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 33918  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 33919  	LONG $0x03e9c149         // shr    r9, 3
 33920  	LONG $0x01c18349         // add    r9, 1
 33921  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 33922  	JE   LBB4_1348
 33923  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33924  	LONG $0xfee78348         // and    rdi, -2
 33925  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33926  	WORD $0xc031             // xor    eax, eax
 33927  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33928  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33929  	QUAD $0x00000080a56f0f66 // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 33930  
 33931  LBB4_672:
 33932  	LONG $0x2c6f0ff3; BYTE $0x81               // movdqu    xmm5, oword [rcx + 4*rax]
 33933  	LONG $0x746f0ff3; WORD $0x1081             // movdqu    xmm6, oword [rcx + 4*rax + 16]
 33934  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 33935  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 33936  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 33937  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33938  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 33939  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 33940  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 33941  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33942  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 33943  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33944  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 33945  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 33946  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 33947  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33948  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 33949  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 33950  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33951  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33952  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 33953  	LONG $0x7e0f4166; WORD $0x002c             // movd    dword [r8 + rax], xmm5
 33954  	LONG $0x7e0f4166; WORD $0x0074; BYTE $0x04 // movd    dword [r8 + rax + 4], xmm6
 33955  	LONG $0x6c6f0ff3; WORD $0x2081             // movdqu    xmm5, oword [rcx + 4*rax + 32]
 33956  	LONG $0x746f0ff3; WORD $0x3081             // movdqu    xmm6, oword [rcx + 4*rax + 48]
 33957  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 33958  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 33959  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 33960  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 33961  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 33962  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 33963  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 33964  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 33965  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 33966  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 33967  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 33968  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 33969  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 33970  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 33971  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 33972  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 33973  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 33974  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 33975  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 33976  	LONG $0x7e0f4166; WORD $0x006c; BYTE $0x08 // movd    dword [r8 + rax + 8], xmm5
 33977  	LONG $0x7e0f4166; WORD $0x0074; BYTE $0x0c // movd    dword [r8 + rax + 12], xmm6
 33978  	LONG $0x10c08348                           // add    rax, 16
 33979  	LONG $0x02c78348                           // add    rdi, 2
 33980  	JNE  LBB4_672
 33981  	JMP  LBB4_1349
 33982  
 33983  LBB4_681:
 33984  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 33985  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 33986  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 33987  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 33988  	LONG $0x02e9c149         // shr    r9, 2
 33989  	LONG $0x01c18349         // add    r9, 1
 33990  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 33991  	JE   LBB4_1357
 33992  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 33993  	LONG $0xfee78348         // and    rdi, -2
 33994  	WORD $0xf748; BYTE $0xdf // neg    rdi
 33995  	WORD $0xf631             // xor    esi, esi
 33996  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 33997  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 33998  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 33999  
 34000  LBB4_683:
 34001  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 34002  	LONG $0xe86e0f66                           // movd    xmm5, eax
 34003  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 34004  	LONG $0xf06e0f66                           // movd    xmm6, eax
 34005  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34006  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34007  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 34008  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34009  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34010  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 34011  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34012  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34013  	LONG $0x22380f66; BYTE $0xed               // pmovsxbq    xmm5, xmm5
 34014  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34015  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34016  	LONG $0x22380f66; BYTE $0xf6               // pmovsxbq    xmm6, xmm6
 34017  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 34018  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34019  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 34020  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 34021  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 34022  	LONG $0x3144b70f; BYTE $0x04               // movzx    eax, word [rcx + rsi + 4]
 34023  	LONG $0xe86e0f66                           // movd    xmm5, eax
 34024  	LONG $0x3144b70f; BYTE $0x06               // movzx    eax, word [rcx + rsi + 6]
 34025  	LONG $0xf06e0f66                           // movd    xmm6, eax
 34026  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34027  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34028  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 34029  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34030  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34031  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 34032  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34033  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34034  	LONG $0x22380f66; BYTE $0xed               // pmovsxbq    xmm5, xmm5
 34035  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34036  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34037  	LONG $0x22380f66; BYTE $0xf6               // pmovsxbq    xmm6, xmm6
 34038  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 34039  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34040  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 34041  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 34042  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 34043  	LONG $0x08c68348                           // add    rsi, 8
 34044  	LONG $0x02c78348                           // add    rdi, 2
 34045  	JNE  LBB4_683
 34046  	JMP  LBB4_1358
 34047  
 34048  LBB4_686:
 34049  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34050  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34051  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34052  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34053  	LONG $0x02e9c149         // shr    r9, 2
 34054  	LONG $0x01c18349         // add    r9, 1
 34055  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34056  	JE   LBB4_1366
 34057  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34058  	LONG $0xfee78348         // and    rdi, -2
 34059  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34060  	WORD $0xf631             // xor    esi, esi
 34061  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34062  	QUAD $0x000000908d6f0f66 // movdqa    xmm1, oword 144[rbp] /* [rip + .LCPI4_15] */
 34063  
 34064  LBB4_688:
 34065  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 34066  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 34067  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 34068  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34069  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 34070  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34071  	LONG $0x7f0f41f3; WORD $0xf014             // movdqu    oword [r8 + 8*rsi], xmm2
 34072  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm3
 34073  	LONG $0x546f0ff3; WORD $0x20f1             // movdqu    xmm2, oword [rcx + 8*rsi + 32]
 34074  	LONG $0x5c6f0ff3; WORD $0x30f1             // movdqu    xmm3, oword [rcx + 8*rsi + 48]
 34075  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 34076  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34077  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 34078  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34079  	LONG $0x7f0f41f3; WORD $0xf054; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm2
 34080  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm3
 34081  	LONG $0x08c68348                           // add    rsi, 8
 34082  	LONG $0x02c78348                           // add    rdi, 2
 34083  	JNE  LBB4_688
 34084  	JMP  LBB4_1367
 34085  
 34086  LBB4_697:
 34087  	WORD $0x8944; BYTE $0xda // mov    edx, r11d
 34088  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34089  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34090  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34091  	LONG $0x02e9c149         // shr    r9, 2
 34092  	LONG $0x01c18349         // add    r9, 1
 34093  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34094  	JE   LBB4_1374
 34095  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34096  	LONG $0xfee78348         // and    rdi, -2
 34097  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34098  	WORD $0xf631             // xor    esi, esi
 34099  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34100  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34101  	QUAD $0x00000090a56f0f66 // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 34102  
 34103  LBB4_699:
 34104  	LONG $0x2c6f0ff3; BYTE $0xf1               // movdqu    xmm5, oword [rcx + 8*rsi]
 34105  	LONG $0x746f0ff3; WORD $0x10f1             // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 34106  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34107  	LONG $0x37380f66; BYTE $0xc5               // pcmpgtq    xmm0, xmm5
 34108  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 34109  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34110  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34111  	LONG $0x37380f66; BYTE $0xce               // pcmpgtq    xmm1, xmm6
 34112  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 34113  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34114  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34115  	LONG $0x15380f66; BYTE $0xfd               // blendvpd    xmm7, xmm5, xmm0
 34116  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34117  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34118  	LONG $0x15380f66; BYTE $0xee               // blendvpd    xmm5, xmm6, xmm0
 34119  	LONG $0x110f4166; WORD $0xf03c             // movupd    oword [r8 + 8*rsi], xmm7
 34120  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm5
 34121  	LONG $0x6c6f0ff3; WORD $0x20f1             // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 34122  	LONG $0x746f0ff3; WORD $0x30f1             // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 34123  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34124  	LONG $0x37380f66; BYTE $0xc5               // pcmpgtq    xmm0, xmm5
 34125  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 34126  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34127  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34128  	LONG $0x37380f66; BYTE $0xce               // pcmpgtq    xmm1, xmm6
 34129  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 34130  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34131  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34132  	LONG $0x15380f66; BYTE $0xfd               // blendvpd    xmm7, xmm5, xmm0
 34133  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34134  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34135  	LONG $0x15380f66; BYTE $0xee               // blendvpd    xmm5, xmm6, xmm0
 34136  	LONG $0x110f4166; WORD $0xf07c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm7
 34137  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm5
 34138  	LONG $0x08c68348                           // add    rsi, 8
 34139  	LONG $0x02c78348                           // add    rdi, 2
 34140  	JNE  LBB4_699
 34141  	JMP  LBB4_1375
 34142  
 34143  LBB4_710:
 34144  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34145  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34146  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34147  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34148  	LONG $0x02e9c149         // shr    r9, 2
 34149  	LONG $0x01c18349         // add    r9, 1
 34150  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34151  	JE   LBB4_1383
 34152  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34153  	LONG $0xfee78348         // and    rdi, -2
 34154  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34155  	WORD $0xf631             // xor    esi, esi
 34156  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34157  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 34158  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 34159  
 34160  LBB4_712:
 34161  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 34162  	LONG $0xd86e0f66                           // movd    xmm3, eax
 34163  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 34164  	LONG $0xe06e0f66                           // movd    xmm4, eax
 34165  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34166  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34167  	LONG $0x32380f66; BYTE $0xdb               // pmovzxbq    xmm3, xmm3
 34168  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34169  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34170  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34171  	LONG $0x32380f66; BYTE $0xe4               // pmovzxbq    xmm4, xmm4
 34172  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34173  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 34174  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 34175  	LONG $0x3144b70f; BYTE $0x04               // movzx    eax, word [rcx + rsi + 4]
 34176  	LONG $0xd86e0f66                           // movd    xmm3, eax
 34177  	LONG $0x3144b70f; BYTE $0x06               // movzx    eax, word [rcx + rsi + 6]
 34178  	LONG $0xe06e0f66                           // movd    xmm4, eax
 34179  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34180  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34181  	LONG $0x32380f66; BYTE $0xdb               // pmovzxbq    xmm3, xmm3
 34182  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34183  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34184  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34185  	LONG $0x32380f66; BYTE $0xe4               // pmovzxbq    xmm4, xmm4
 34186  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34187  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 34188  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 34189  	LONG $0x08c68348                           // add    rsi, 8
 34190  	LONG $0x02c78348                           // add    rdi, 2
 34191  	JNE  LBB4_712
 34192  	JMP  LBB4_1384
 34193  
 34194  LBB4_730:
 34195  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34196  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34197  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34198  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34199  	LONG $0x04e9c149         // shr    r9, 4
 34200  	LONG $0x01c18349         // add    r9, 1
 34201  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34202  	JE   LBB4_1391
 34203  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34204  	LONG $0xfee78348         // and    rdi, -2
 34205  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34206  	WORD $0xf631             // xor    esi, esi
 34207  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34208  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34209  	QUAD $0x000000e0a56f0f66 // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 34210  
 34211  LBB4_732:
 34212  	LONG $0x2c7e0ff3; BYTE $0x31               // movq    xmm5, qword [rcx + rsi]
 34213  	LONG $0x747e0ff3; WORD $0x0831             // movq    xmm6, qword [rcx + rsi + 8]
 34214  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34215  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34216  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 34217  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34218  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34219  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 34220  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34221  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34222  	LONG $0x20380f66; BYTE $0xed               // pmovsxbw    xmm5, xmm5
 34223  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34224  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34225  	LONG $0x20380f66; BYTE $0xf6               // pmovsxbw    xmm6, xmm6
 34226  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 34227  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34228  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 34229  	LONG $0x7f0f41f3; WORD $0x702c             // movdqu    oword [r8 + 2*rsi], xmm5
 34230  	LONG $0x7f0f41f3; WORD $0x7074; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm6
 34231  	LONG $0x6c7e0ff3; WORD $0x1031             // movq    xmm5, qword [rcx + rsi + 16]
 34232  	LONG $0x747e0ff3; WORD $0x1831             // movq    xmm6, qword [rcx + rsi + 24]
 34233  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34234  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34235  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 34236  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34237  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34238  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 34239  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34240  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34241  	LONG $0x20380f66; BYTE $0xed               // pmovsxbw    xmm5, xmm5
 34242  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34243  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34244  	LONG $0x20380f66; BYTE $0xf6               // pmovsxbw    xmm6, xmm6
 34245  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 34246  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34247  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 34248  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm5
 34249  	LONG $0x7f0f41f3; WORD $0x7074; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm6
 34250  	LONG $0x20c68348                           // add    rsi, 32
 34251  	LONG $0x02c78348                           // add    rdi, 2
 34252  	JNE  LBB4_732
 34253  	JMP  LBB4_1392
 34254  
 34255  LBB4_735:
 34256  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34257  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34258  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34259  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34260  	LONG $0x04e9c149         // shr    r9, 4
 34261  	LONG $0x01c18349         // add    r9, 1
 34262  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34263  	JE   LBB4_1400
 34264  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34265  	LONG $0xfee78348         // and    rdi, -2
 34266  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34267  	WORD $0xf631             // xor    esi, esi
 34268  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34269  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34270  	QUAD $0x000000e0a56f0f66 // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 34271  
 34272  LBB4_737:
 34273  	LONG $0x2c7e0ff3; BYTE $0x31               // movq    xmm5, qword [rcx + rsi]
 34274  	LONG $0x747e0ff3; WORD $0x0831             // movq    xmm6, qword [rcx + rsi + 8]
 34275  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34276  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34277  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 34278  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34279  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34280  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 34281  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34282  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34283  	LONG $0x20380f66; BYTE $0xed               // pmovsxbw    xmm5, xmm5
 34284  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34285  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34286  	LONG $0x20380f66; BYTE $0xf6               // pmovsxbw    xmm6, xmm6
 34287  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 34288  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34289  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 34290  	LONG $0x7f0f41f3; WORD $0x702c             // movdqu    oword [r8 + 2*rsi], xmm5
 34291  	LONG $0x7f0f41f3; WORD $0x7074; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm6
 34292  	LONG $0x6c7e0ff3; WORD $0x1031             // movq    xmm5, qword [rcx + rsi + 16]
 34293  	LONG $0x747e0ff3; WORD $0x1831             // movq    xmm6, qword [rcx + rsi + 24]
 34294  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34295  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34296  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 34297  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34298  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34299  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 34300  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34301  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34302  	LONG $0x20380f66; BYTE $0xed               // pmovsxbw    xmm5, xmm5
 34303  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34304  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34305  	LONG $0x20380f66; BYTE $0xf6               // pmovsxbw    xmm6, xmm6
 34306  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 34307  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34308  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 34309  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm5
 34310  	LONG $0x7f0f41f3; WORD $0x7074; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm6
 34311  	LONG $0x20c68348                           // add    rsi, 32
 34312  	LONG $0x02c78348                           // add    rdi, 2
 34313  	JNE  LBB4_737
 34314  	JMP  LBB4_1401
 34315  
 34316  LBB4_746:
 34317  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34318  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34319  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34320  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34321  	LONG $0x04e9c149         // shr    r9, 4
 34322  	LONG $0x01c18349         // add    r9, 1
 34323  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34324  	JE   LBB4_1409
 34325  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34326  	LONG $0xfee78348         // and    rdi, -2
 34327  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34328  	WORD $0xf631             // xor    esi, esi
 34329  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34330  	QUAD $0x000000e08d6f0f66 // movdqa    xmm1, oword 224[rbp] /* [rip + .LCPI4_20] */
 34331  
 34332  LBB4_748:
 34333  	LONG $0x146f0ff3; BYTE $0x71               // movdqu    xmm2, oword [rcx + 2*rsi]
 34334  	LONG $0x5c6f0ff3; WORD $0x1071             // movdqu    xmm3, oword [rcx + 2*rsi + 16]
 34335  	LONG $0xd0750f66                           // pcmpeqw    xmm2, xmm0
 34336  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34337  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 34338  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34339  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 34340  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 34341  	LONG $0x546f0ff3; WORD $0x2071             // movdqu    xmm2, oword [rcx + 2*rsi + 32]
 34342  	LONG $0x5c6f0ff3; WORD $0x3071             // movdqu    xmm3, oword [rcx + 2*rsi + 48]
 34343  	LONG $0xd0750f66                           // pcmpeqw    xmm2, xmm0
 34344  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34345  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 34346  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34347  	LONG $0x7f0f41f3; WORD $0x7054; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm2
 34348  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm3
 34349  	LONG $0x20c68348                           // add    rsi, 32
 34350  	LONG $0x02c78348                           // add    rdi, 2
 34351  	JNE  LBB4_748
 34352  	JMP  LBB4_1410
 34353  
 34354  LBB4_751:
 34355  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34356  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34357  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34358  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34359  	LONG $0x04e9c149         // shr    r9, 4
 34360  	LONG $0x01c18349         // add    r9, 1
 34361  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34362  	JE   LBB4_1417
 34363  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34364  	LONG $0xfee78348         // and    rdi, -2
 34365  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34366  	WORD $0xf631             // xor    esi, esi
 34367  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34368  	QUAD $0x000000e08d6f0f66 // movdqa    xmm1, oword 224[rbp] /* [rip + .LCPI4_20] */
 34369  
 34370  LBB4_753:
 34371  	LONG $0x146f0ff3; BYTE $0x71               // movdqu    xmm2, oword [rcx + 2*rsi]
 34372  	LONG $0x5c6f0ff3; WORD $0x1071             // movdqu    xmm3, oword [rcx + 2*rsi + 16]
 34373  	LONG $0xd0750f66                           // pcmpeqw    xmm2, xmm0
 34374  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34375  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 34376  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34377  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 34378  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 34379  	LONG $0x546f0ff3; WORD $0x2071             // movdqu    xmm2, oword [rcx + 2*rsi + 32]
 34380  	LONG $0x5c6f0ff3; WORD $0x3071             // movdqu    xmm3, oword [rcx + 2*rsi + 48]
 34381  	LONG $0xd0750f66                           // pcmpeqw    xmm2, xmm0
 34382  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34383  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 34384  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34385  	LONG $0x7f0f41f3; WORD $0x7054; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm2
 34386  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm3
 34387  	LONG $0x20c68348                           // add    rsi, 32
 34388  	LONG $0x02c78348                           // add    rdi, 2
 34389  	JNE  LBB4_753
 34390  	JMP  LBB4_1418
 34391  
 34392  LBB4_756:
 34393  	WORD $0x8944; BYTE $0xda // mov    edx, r11d
 34394  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34395  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34396  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34397  	LONG $0x04e9c149         // shr    r9, 4
 34398  	LONG $0x01c18349         // add    r9, 1
 34399  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34400  	JE   LBB4_1425
 34401  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34402  	LONG $0xfee78348         // and    rdi, -2
 34403  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34404  	WORD $0xf631             // xor    esi, esi
 34405  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34406  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34407  	QUAD $0x000000e0a56f0f66 // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 34408  
 34409  LBB4_758:
 34410  	LONG $0x2c6f0ff3; BYTE $0x71               // movdqu    xmm5, oword [rcx + 2*rsi]
 34411  	LONG $0x746f0ff3; WORD $0x1071             // movdqu    xmm6, oword [rcx + 2*rsi + 16]
 34412  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34413  	LONG $0xc5650f66                           // pcmpgtw    xmm0, xmm5
 34414  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 34415  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34416  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34417  	LONG $0xce650f66                           // pcmpgtw    xmm1, xmm6
 34418  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 34419  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34420  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34421  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 34422  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34423  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34424  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 34425  	LONG $0x7f0f41f3; WORD $0x703c             // movdqu    oword [r8 + 2*rsi], xmm7
 34426  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm5
 34427  	LONG $0x6c6f0ff3; WORD $0x2071             // movdqu    xmm5, oword [rcx + 2*rsi + 32]
 34428  	LONG $0x746f0ff3; WORD $0x3071             // movdqu    xmm6, oword [rcx + 2*rsi + 48]
 34429  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34430  	LONG $0xc5650f66                           // pcmpgtw    xmm0, xmm5
 34431  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 34432  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34433  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34434  	LONG $0xce650f66                           // pcmpgtw    xmm1, xmm6
 34435  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 34436  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34437  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34438  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 34439  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34440  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34441  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 34442  	LONG $0x7f0f41f3; WORD $0x707c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm7
 34443  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm5
 34444  	LONG $0x20c68348                           // add    rsi, 32
 34445  	LONG $0x02c78348                           // add    rdi, 2
 34446  	JNE  LBB4_758
 34447  	JMP  LBB4_1426
 34448  
 34449  LBB4_761:
 34450  	WORD $0x8944; BYTE $0xda // mov    edx, r11d
 34451  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34452  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34453  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34454  	LONG $0x04e9c149         // shr    r9, 4
 34455  	LONG $0x01c18349         // add    r9, 1
 34456  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34457  	JE   LBB4_1434
 34458  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34459  	LONG $0xfee78348         // and    rdi, -2
 34460  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34461  	WORD $0xf631             // xor    esi, esi
 34462  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34463  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34464  	QUAD $0x000000e0a56f0f66 // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 34465  
 34466  LBB4_763:
 34467  	LONG $0x2c6f0ff3; BYTE $0x71               // movdqu    xmm5, oword [rcx + 2*rsi]
 34468  	LONG $0x746f0ff3; WORD $0x1071             // movdqu    xmm6, oword [rcx + 2*rsi + 16]
 34469  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34470  	LONG $0xc5650f66                           // pcmpgtw    xmm0, xmm5
 34471  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 34472  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34473  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34474  	LONG $0xce650f66                           // pcmpgtw    xmm1, xmm6
 34475  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 34476  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34477  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34478  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 34479  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34480  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34481  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 34482  	LONG $0x7f0f41f3; WORD $0x703c             // movdqu    oword [r8 + 2*rsi], xmm7
 34483  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm5
 34484  	LONG $0x6c6f0ff3; WORD $0x2071             // movdqu    xmm5, oword [rcx + 2*rsi + 32]
 34485  	LONG $0x746f0ff3; WORD $0x3071             // movdqu    xmm6, oword [rcx + 2*rsi + 48]
 34486  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34487  	LONG $0xc5650f66                           // pcmpgtw    xmm0, xmm5
 34488  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 34489  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34490  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34491  	LONG $0xce650f66                           // pcmpgtw    xmm1, xmm6
 34492  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 34493  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34494  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34495  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 34496  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34497  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34498  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 34499  	LONG $0x7f0f41f3; WORD $0x707c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm7
 34500  	LONG $0x7f0f41f3; WORD $0x706c; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm5
 34501  	LONG $0x20c68348                           // add    rsi, 32
 34502  	LONG $0x02c78348                           // add    rdi, 2
 34503  	JNE  LBB4_763
 34504  	JMP  LBB4_1435
 34505  
 34506  LBB4_778:
 34507  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34508  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34509  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34510  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34511  	LONG $0x04e9c149         // shr    r9, 4
 34512  	LONG $0x01c18349         // add    r9, 1
 34513  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34514  	JE   LBB4_1443
 34515  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34516  	LONG $0xfee78348         // and    rdi, -2
 34517  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34518  	WORD $0xf631             // xor    esi, esi
 34519  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34520  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 34521  	QUAD $0x000000e0956f0f66 // movdqa    xmm2, oword 224[rbp] /* [rip + .LCPI4_20] */
 34522  
 34523  LBB4_780:
 34524  	LONG $0x1c7e0ff3; BYTE $0x31               // movq    xmm3, qword [rcx + rsi]
 34525  	LONG $0x647e0ff3; WORD $0x0831             // movq    xmm4, qword [rcx + rsi + 8]
 34526  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34527  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34528  	LONG $0x30380f66; BYTE $0xdb               // pmovzxbw    xmm3, xmm3
 34529  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34530  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34531  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34532  	LONG $0x30380f66; BYTE $0xe4               // pmovzxbw    xmm4, xmm4
 34533  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34534  	LONG $0x7f0f41f3; WORD $0x701c             // movdqu    oword [r8 + 2*rsi], xmm3
 34535  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm4
 34536  	LONG $0x5c7e0ff3; WORD $0x1031             // movq    xmm3, qword [rcx + rsi + 16]
 34537  	LONG $0x647e0ff3; WORD $0x1831             // movq    xmm4, qword [rcx + rsi + 24]
 34538  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34539  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34540  	LONG $0x30380f66; BYTE $0xdb               // pmovzxbw    xmm3, xmm3
 34541  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34542  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34543  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34544  	LONG $0x30380f66; BYTE $0xe4               // pmovzxbw    xmm4, xmm4
 34545  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34546  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm3
 34547  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm4
 34548  	LONG $0x20c68348                           // add    rsi, 32
 34549  	LONG $0x02c78348                           // add    rdi, 2
 34550  	JNE  LBB4_780
 34551  	JMP  LBB4_1444
 34552  
 34553  LBB4_783:
 34554  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34555  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 34556  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 34557  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34558  	LONG $0x04e9c149         // shr    r9, 4
 34559  	LONG $0x01c18349         // add    r9, 1
 34560  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34561  	JE   LBB4_1451
 34562  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34563  	LONG $0xfee78348         // and    rdi, -2
 34564  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34565  	WORD $0xf631             // xor    esi, esi
 34566  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34567  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 34568  	QUAD $0x000000e0956f0f66 // movdqa    xmm2, oword 224[rbp] /* [rip + .LCPI4_20] */
 34569  
 34570  LBB4_785:
 34571  	LONG $0x1c7e0ff3; BYTE $0x31               // movq    xmm3, qword [rcx + rsi]
 34572  	LONG $0x647e0ff3; WORD $0x0831             // movq    xmm4, qword [rcx + rsi + 8]
 34573  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34574  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34575  	LONG $0x30380f66; BYTE $0xdb               // pmovzxbw    xmm3, xmm3
 34576  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34577  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34578  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34579  	LONG $0x30380f66; BYTE $0xe4               // pmovzxbw    xmm4, xmm4
 34580  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34581  	LONG $0x7f0f41f3; WORD $0x701c             // movdqu    oword [r8 + 2*rsi], xmm3
 34582  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm4
 34583  	LONG $0x5c7e0ff3; WORD $0x1031             // movq    xmm3, qword [rcx + rsi + 16]
 34584  	LONG $0x647e0ff3; WORD $0x1831             // movq    xmm4, qword [rcx + rsi + 24]
 34585  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34586  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34587  	LONG $0x30380f66; BYTE $0xdb               // pmovzxbw    xmm3, xmm3
 34588  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34589  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34590  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34591  	LONG $0x30380f66; BYTE $0xe4               // pmovzxbw    xmm4, xmm4
 34592  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34593  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x20 // movdqu    oword [r8 + 2*rsi + 32], xmm3
 34594  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x30 // movdqu    oword [r8 + 2*rsi + 48], xmm4
 34595  	LONG $0x20c68348                           // add    rsi, 32
 34596  	LONG $0x02c78348                           // add    rdi, 2
 34597  	JNE  LBB4_785
 34598  	JMP  LBB4_1452
 34599  
 34600  LBB4_806:
 34601  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34602  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34603  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34604  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34605  	LONG $0x02e9c149         // shr    r9, 2
 34606  	LONG $0x01c18349         // add    r9, 1
 34607  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34608  	JE   LBB4_1459
 34609  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34610  	LONG $0xfee78348         // and    rdi, -2
 34611  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34612  	WORD $0xf631             // xor    esi, esi
 34613  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34614  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34615  	QUAD $0x00000090a5280f66 // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 34616  
 34617  LBB4_808:
 34618  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 34619  	LONG $0xe86e0f66                           // movd    xmm5, eax
 34620  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 34621  	LONG $0xf06e0f66                           // movd    xmm6, eax
 34622  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34623  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34624  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 34625  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34626  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34627  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 34628  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34629  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34630  	LONG $0x22380f66; BYTE $0xed               // pmovsxbq    xmm5, xmm5
 34631  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34632  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34633  	LONG $0x22380f66; BYTE $0xf6               // pmovsxbq    xmm6, xmm6
 34634  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 34635  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34636  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 34637  	LONG $0x110f4166; WORD $0xf02c             // movupd    oword [r8 + 8*rsi], xmm5
 34638  	LONG $0x110f4166; WORD $0xf074; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm6
 34639  	LONG $0x3144b70f; BYTE $0x04               // movzx    eax, word [rcx + rsi + 4]
 34640  	LONG $0xe86e0f66                           // movd    xmm5, eax
 34641  	LONG $0x3144b70f; BYTE $0x06               // movzx    eax, word [rcx + rsi + 6]
 34642  	LONG $0xf06e0f66                           // movd    xmm6, eax
 34643  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 34644  	LONG $0xc2640f66                           // pcmpgtb    xmm0, xmm2
 34645  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 34646  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 34647  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 34648  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 34649  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 34650  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34651  	LONG $0x22380f66; BYTE $0xed               // pmovsxbq    xmm5, xmm5
 34652  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 34653  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34654  	LONG $0x22380f66; BYTE $0xf6               // pmovsxbq    xmm6, xmm6
 34655  	LONG $0x15380f66; BYTE $0xec               // blendvpd    xmm5, xmm4, xmm0
 34656  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34657  	LONG $0x15380f66; BYTE $0xf4               // blendvpd    xmm6, xmm4, xmm0
 34658  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm5
 34659  	LONG $0x110f4166; WORD $0xf074; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm6
 34660  	LONG $0x08c68348                           // add    rsi, 8
 34661  	LONG $0x02c78348                           // add    rdi, 2
 34662  	JNE  LBB4_808
 34663  	JMP  LBB4_1460
 34664  
 34665  LBB4_811:
 34666  	WORD $0xc289                               // mov    edx, eax
 34667  	WORD $0xe283; BYTE $0xf8                   // and    edx, -8
 34668  	LONG $0xf8728d48                           // lea    rsi, [rdx - 8]
 34669  	WORD $0x8949; BYTE $0xf1                   // mov    r9, rsi
 34670  	LONG $0x03e9c149                           // shr    r9, 3
 34671  	LONG $0x01c18349                           // add    r9, 1
 34672  	WORD $0x8548; BYTE $0xf6                   // test    rsi, rsi
 34673  	JE   LBB4_1468
 34674  	WORD $0x894c; BYTE $0xcf                   // mov    rdi, r9
 34675  	LONG $0xfee78348                           // and    rdi, -2
 34676  	WORD $0xf748; BYTE $0xdf                   // neg    rdi
 34677  	WORD $0xf631                               // xor    esi, esi
 34678  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 34679  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 34680  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 34681  
 34682  LBB4_813:
 34683  	LONG $0x2c6e0f66; BYTE $0x31   // movd    xmm5, dword [rcx + rsi]
 34684  	LONG $0x746e0f66; WORD $0x0431 // movd    xmm6, dword [rcx + rsi + 4]
 34685  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 34686  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 34687  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 34688  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 34689  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 34690  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 34691  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 34692  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 34693  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 34694  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 34695  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 34696  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 34697  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 34698  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 34699  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 34700  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 34701  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 34702  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 34703  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 34704  	LONG $0x6c6e0f66; WORD $0x0831 // movd    xmm5, dword [rcx + rsi + 8]
 34705  	LONG $0x746e0f66; WORD $0x0c31 // movd    xmm6, dword [rcx + rsi + 12]
 34706  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 34707  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 34708  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 34709  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 34710  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 34711  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 34712  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 34713  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 34714  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 34715  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 34716  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 34717  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 34718  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 34719  	WORD $0x5b0f; BYTE $0xf6       // cvtdq2ps    xmm6, xmm6
 34720  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 34721  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 34722  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 34723  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 34724  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 34725  	LONG $0x10c68348               // add    rsi, 16
 34726  	LONG $0x02c78348               // add    rdi, 2
 34727  	JNE  LBB4_813
 34728  	JMP  LBB4_1469
 34729  
 34730  LBB4_816:
 34731  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34732  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34733  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34734  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34735  	LONG $0x02e9c149         // shr    r9, 2
 34736  	LONG $0x01c18349         // add    r9, 1
 34737  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34738  	JE   LBB4_1490
 34739  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34740  	LONG $0xfee78348         // and    rdi, -2
 34741  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34742  	WORD $0xf631             // xor    esi, esi
 34743  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34744  	QUAD $0x000000908d6f0f66 // movdqa    xmm1, oword 144[rbp] /* [rip + .LCPI4_15] */
 34745  
 34746  LBB4_818:
 34747  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 34748  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 34749  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 34750  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34751  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 34752  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34753  	LONG $0x7f0f41f3; WORD $0xf014             // movdqu    oword [r8 + 8*rsi], xmm2
 34754  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm3
 34755  	LONG $0x546f0ff3; WORD $0x20f1             // movdqu    xmm2, oword [rcx + 8*rsi + 32]
 34756  	LONG $0x5c6f0ff3; WORD $0x30f1             // movdqu    xmm3, oword [rcx + 8*rsi + 48]
 34757  	LONG $0x29380f66; BYTE $0xd0               // pcmpeqq    xmm2, xmm0
 34758  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 34759  	LONG $0x29380f66; BYTE $0xd8               // pcmpeqq    xmm3, xmm0
 34760  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 34761  	LONG $0x7f0f41f3; WORD $0xf054; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm2
 34762  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm3
 34763  	LONG $0x08c68348                           // add    rsi, 8
 34764  	LONG $0x02c78348                           // add    rdi, 2
 34765  	JNE  LBB4_818
 34766  	JMP  LBB4_1491
 34767  
 34768  LBB4_843:
 34769  	WORD $0x8944; BYTE $0xda // mov    edx, r11d
 34770  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34771  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34772  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34773  	LONG $0x02e9c149         // shr    r9, 2
 34774  	LONG $0x01c18349         // add    r9, 1
 34775  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34776  	JE   LBB4_1498
 34777  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34778  	LONG $0xfee78348         // and    rdi, -2
 34779  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34780  	WORD $0xf631             // xor    esi, esi
 34781  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 34782  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 34783  	QUAD $0x00000090a56f0f66 // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 34784  
 34785  LBB4_845:
 34786  	LONG $0x2c6f0ff3; BYTE $0xf1               // movdqu    xmm5, oword [rcx + 8*rsi]
 34787  	LONG $0x746f0ff3; WORD $0x10f1             // movdqu    xmm6, oword [rcx + 8*rsi + 16]
 34788  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34789  	LONG $0x37380f66; BYTE $0xc5               // pcmpgtq    xmm0, xmm5
 34790  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 34791  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34792  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34793  	LONG $0x37380f66; BYTE $0xce               // pcmpgtq    xmm1, xmm6
 34794  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 34795  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34796  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34797  	LONG $0x15380f66; BYTE $0xfd               // blendvpd    xmm7, xmm5, xmm0
 34798  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34799  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34800  	LONG $0x15380f66; BYTE $0xee               // blendvpd    xmm5, xmm6, xmm0
 34801  	LONG $0x110f4166; WORD $0xf03c             // movupd    oword [r8 + 8*rsi], xmm7
 34802  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm5
 34803  	LONG $0x6c6f0ff3; WORD $0x20f1             // movdqu    xmm5, oword [rcx + 8*rsi + 32]
 34804  	LONG $0x746f0ff3; WORD $0x30f1             // movdqu    xmm6, oword [rcx + 8*rsi + 48]
 34805  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 34806  	LONG $0x37380f66; BYTE $0xc5               // pcmpgtq    xmm0, xmm5
 34807  	LONG $0x29380f66; BYTE $0xea               // pcmpeqq    xmm5, xmm2
 34808  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 34809  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 34810  	LONG $0x37380f66; BYTE $0xce               // pcmpgtq    xmm1, xmm6
 34811  	LONG $0x29380f66; BYTE $0xf2               // pcmpeqq    xmm6, xmm2
 34812  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 34813  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 34814  	LONG $0x15380f66; BYTE $0xfd               // blendvpd    xmm7, xmm5, xmm0
 34815  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 34816  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 34817  	LONG $0x15380f66; BYTE $0xee               // blendvpd    xmm5, xmm6, xmm0
 34818  	LONG $0x110f4166; WORD $0xf07c; BYTE $0x20 // movupd    oword [r8 + 8*rsi + 32], xmm7
 34819  	LONG $0x110f4166; WORD $0xf06c; BYTE $0x30 // movupd    oword [r8 + 8*rsi + 48], xmm5
 34820  	LONG $0x08c68348                           // add    rsi, 8
 34821  	LONG $0x02c78348                           // add    rdi, 2
 34822  	JNE  LBB4_845
 34823  	JMP  LBB4_1499
 34824  
 34825  LBB4_989:
 34826  	QUAD $0x0000013085100ff3 // movss    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 34827  
 34828  LBB4_990:
 34829  	JLE  LBB4_992
 34830  	QUAD $0x0000012885100ff3 // movss    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 34831  
 34832  LBB4_992:
 34833  	LONG $0x110f41f3; WORD $0x8004 // movss    dword [r8 + 4*rax], xmm0
 34834  	JMP  LBB4_1655
 34835  
 34836  LBB4_866:
 34837  	WORD $0xc289                 // mov    edx, eax
 34838  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 34839  	WORD $0xf631                 // xor    esi, esi
 34840  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 34841  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI4_8] */
 34842  
 34843  LBB4_867:
 34844  	LONG $0x146f0ff3; BYTE $0xb1   // movdqu    xmm2, oword [rcx + 4*rsi]
 34845  	LONG $0x5c6f0ff3; WORD $0x10b1 // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 34846  	LONG $0xe26f0f66               // movdqa    xmm4, xmm2
 34847  	LONG $0xe4720f66; BYTE $0x1f   // psrad    xmm4, 31
 34848  	LONG $0xe1eb0f66               // por    xmm4, xmm1
 34849  	LONG $0xeb6f0f66               // movdqa    xmm5, xmm3
 34850  	LONG $0xe5720f66; BYTE $0x1f   // psrad    xmm5, 31
 34851  	LONG $0xe9eb0f66               // por    xmm5, xmm1
 34852  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 34853  	WORD $0x5b0f; BYTE $0xed       // cvtdq2ps    xmm5, xmm5
 34854  	LONG $0x04d0c20f               // cmpneqps    xmm2, xmm0
 34855  	WORD $0x540f; BYTE $0xd4       // andps    xmm2, xmm4
 34856  	LONG $0x04d8c20f               // cmpneqps    xmm3, xmm0
 34857  	WORD $0x540f; BYTE $0xdd       // andps    xmm3, xmm5
 34858  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 34859  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 34860  	LONG $0x08c68348               // add    rsi, 8
 34861  	WORD $0x3948; BYTE $0xf2       // cmp    rdx, rsi
 34862  	JNE  LBB4_867
 34863  	WORD $0x3948; BYTE $0xc2       // cmp    rdx, rax
 34864  	JE   LBB4_1655
 34865  	JMP  LBB4_869
 34866  
 34867  LBB4_876:
 34868  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 34869  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 34870  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 34871  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34872  	LONG $0x02e9c149         // shr    r9, 2
 34873  	LONG $0x01c18349         // add    r9, 1
 34874  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34875  	JE   LBB4_1507
 34876  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34877  	LONG $0xfee78348         // and    rdi, -2
 34878  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34879  	WORD $0xf631             // xor    esi, esi
 34880  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34881  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 34882  	QUAD $0x00000090956f0f66 // movdqa    xmm2, oword 144[rbp] /* [rip + .LCPI4_15] */
 34883  
 34884  LBB4_878:
 34885  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 34886  	LONG $0xd86e0f66                           // movd    xmm3, eax
 34887  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 34888  	LONG $0xe06e0f66                           // movd    xmm4, eax
 34889  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34890  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34891  	LONG $0x32380f66; BYTE $0xdb               // pmovzxbq    xmm3, xmm3
 34892  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34893  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34894  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34895  	LONG $0x32380f66; BYTE $0xe4               // pmovzxbq    xmm4, xmm4
 34896  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34897  	LONG $0x7f0f41f3; WORD $0xf01c             // movdqu    oword [r8 + 8*rsi], xmm3
 34898  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm4
 34899  	LONG $0x3144b70f; BYTE $0x04               // movzx    eax, word [rcx + rsi + 4]
 34900  	LONG $0xd86e0f66                           // movd    xmm3, eax
 34901  	LONG $0x3144b70f; BYTE $0x06               // movzx    eax, word [rcx + rsi + 6]
 34902  	LONG $0xe06e0f66                           // movd    xmm4, eax
 34903  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 34904  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34905  	LONG $0x32380f66; BYTE $0xdb               // pmovzxbq    xmm3, xmm3
 34906  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34907  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 34908  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34909  	LONG $0x32380f66; BYTE $0xe4               // pmovzxbq    xmm4, xmm4
 34910  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34911  	LONG $0x7f0f41f3; WORD $0xf05c; BYTE $0x20 // movdqu    oword [r8 + 8*rsi + 32], xmm3
 34912  	LONG $0x7f0f41f3; WORD $0xf064; BYTE $0x30 // movdqu    oword [r8 + 8*rsi + 48], xmm4
 34913  	LONG $0x08c68348                           // add    rsi, 8
 34914  	LONG $0x02c78348                           // add    rdi, 2
 34915  	JNE  LBB4_878
 34916  	JMP  LBB4_1508
 34917  
 34918  LBB4_881:
 34919  	WORD $0xc289                 // mov    edx, eax
 34920  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 34921  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 34922  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 34923  	LONG $0x03e9c149             // shr    r9, 3
 34924  	LONG $0x01c18349             // add    r9, 1
 34925  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 34926  	JE   LBB4_1515
 34927  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 34928  	LONG $0xfee78348             // and    rdi, -2
 34929  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 34930  	WORD $0xf631                 // xor    esi, esi
 34931  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 34932  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 34933  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 34934  
 34935  LBB4_883:
 34936  	LONG $0x1c6e0f66; BYTE $0x31   // movd    xmm3, dword [rcx + rsi]
 34937  	LONG $0x646e0f66; WORD $0x0431 // movd    xmm4, dword [rcx + rsi + 4]
 34938  	LONG $0xd8740f66               // pcmpeqb    xmm3, xmm0
 34939  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 34940  	LONG $0x31380f66; BYTE $0xdb   // pmovzxbd    xmm3, xmm3
 34941  	LONG $0xdadb0f66               // pand    xmm3, xmm2
 34942  	WORD $0x5b0f; BYTE $0xdb       // cvtdq2ps    xmm3, xmm3
 34943  	LONG $0xe0740f66               // pcmpeqb    xmm4, xmm0
 34944  	LONG $0xe1ef0f66               // pxor    xmm4, xmm1
 34945  	LONG $0x31380f66; BYTE $0xe4   // pmovzxbd    xmm4, xmm4
 34946  	LONG $0xe2db0f66               // pand    xmm4, xmm2
 34947  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 34948  	LONG $0x1c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm3
 34949  	LONG $0x64110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm4
 34950  	LONG $0x5c6e0f66; WORD $0x0831 // movd    xmm3, dword [rcx + rsi + 8]
 34951  	LONG $0x646e0f66; WORD $0x0c31 // movd    xmm4, dword [rcx + rsi + 12]
 34952  	LONG $0xd8740f66               // pcmpeqb    xmm3, xmm0
 34953  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 34954  	LONG $0x31380f66; BYTE $0xdb   // pmovzxbd    xmm3, xmm3
 34955  	LONG $0xdadb0f66               // pand    xmm3, xmm2
 34956  	WORD $0x5b0f; BYTE $0xdb       // cvtdq2ps    xmm3, xmm3
 34957  	LONG $0xe0740f66               // pcmpeqb    xmm4, xmm0
 34958  	LONG $0xe1ef0f66               // pxor    xmm4, xmm1
 34959  	LONG $0x31380f66; BYTE $0xe4   // pmovzxbd    xmm4, xmm4
 34960  	LONG $0xe2db0f66               // pand    xmm4, xmm2
 34961  	WORD $0x5b0f; BYTE $0xe4       // cvtdq2ps    xmm4, xmm4
 34962  	LONG $0x5c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm3
 34963  	LONG $0x64110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm4
 34964  	LONG $0x10c68348               // add    rsi, 16
 34965  	LONG $0x02c78348               // add    rdi, 2
 34966  	JNE  LBB4_883
 34967  	JMP  LBB4_1516
 34968  
 34969  LBB4_892:
 34970  	WORD $0xc289             // mov    edx, eax
 34971  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 34972  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 34973  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 34974  	LONG $0x03e9c149         // shr    r9, 3
 34975  	LONG $0x01c18349         // add    r9, 1
 34976  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 34977  	JE   LBB4_1535
 34978  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 34979  	LONG $0xfee78348         // and    rdi, -2
 34980  	WORD $0xf748; BYTE $0xdf // neg    rdi
 34981  	WORD $0xf631             // xor    esi, esi
 34982  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 34983  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 34984  	QUAD $0x00000080956f0f66 // movdqa    xmm2, oword 128[rbp] /* [rip + .LCPI4_12] */
 34985  
 34986  LBB4_894:
 34987  	LONG $0x1c6f0ff3; BYTE $0xb1               // movdqu    xmm3, oword [rcx + 4*rsi]
 34988  	LONG $0x646f0ff3; WORD $0x10b1             // movdqu    xmm4, oword [rcx + 4*rsi + 16]
 34989  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 34990  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 34991  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 34992  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 34993  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 34994  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 34995  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 34996  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 34997  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 34998  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 34999  	LONG $0x7e0f4166; WORD $0x301c             // movd    dword [r8 + rsi], xmm3
 35000  	LONG $0x7e0f4166; WORD $0x3064; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm4
 35001  	LONG $0x5c6f0ff3; WORD $0x20b1             // movdqu    xmm3, oword [rcx + 4*rsi + 32]
 35002  	LONG $0x646f0ff3; WORD $0x30b1             // movdqu    xmm4, oword [rcx + 4*rsi + 48]
 35003  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 35004  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 35005  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 35006  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 35007  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 35008  	LONG $0xe0760f66                           // pcmpeqd    xmm4, xmm0
 35009  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 35010  	LONG $0xe46b0f66                           // packssdw    xmm4, xmm4
 35011  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 35012  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 35013  	LONG $0x7e0f4166; WORD $0x305c; BYTE $0x08 // movd    dword [r8 + rsi + 8], xmm3
 35014  	LONG $0x7e0f4166; WORD $0x3064; BYTE $0x0c // movd    dword [r8 + rsi + 12], xmm4
 35015  	LONG $0x10c68348                           // add    rsi, 16
 35016  	LONG $0x02c78348                           // add    rdi, 2
 35017  	JNE  LBB4_894
 35018  	JMP  LBB4_1536
 35019  
 35020  LBB4_897:
 35021  	WORD $0xc289                 // mov    edx, eax
 35022  	WORD $0xe283; BYTE $0xfc     // and    edx, -4
 35023  	LONG $0xfc728d48             // lea    rsi, [rdx - 4]
 35024  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 35025  	LONG $0x02e9c149             // shr    r9, 2
 35026  	LONG $0x01c18349             // add    r9, 1
 35027  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 35028  	JE   LBB4_1543
 35029  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 35030  	LONG $0xfee78348             // and    rdi, -2
 35031  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 35032  	WORD $0xf631                 // xor    esi, esi
 35033  	LONG $0xd2570f66             // xorpd    xmm2, xmm2
 35034  	LONG $0x5d280f66; BYTE $0x00 // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 35035  	LONG $0x65280f66; BYTE $0x10 // movapd    xmm4, oword 16[rbp] /* [rip + .LCPI4_1] */
 35036  	LONG $0x6d6f0f66; BYTE $0x40 // movdqa    xmm5, oword 64[rbp] /* [rip + .LCPI4_7] */
 35037  
 35038  LBB4_899:
 35039  	LONG $0x34100f66; BYTE $0xf1         // movupd    xmm6, oword [rcx + 8*rsi]
 35040  	LONG $0x7c100f66; WORD $0x10f1       // movupd    xmm7, oword [rcx + 8*rsi + 16]
 35041  	LONG $0xc6280f66                     // movapd    xmm0, xmm6
 35042  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 35043  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35044  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35045  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 35046  	LONG $0xcf280f66                     // movapd    xmm1, xmm7
 35047  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 35048  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35049  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35050  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 35051  	LONG $0xf3540f66                     // andpd    xmm6, xmm3
 35052  	LONG $0xf4560f66                     // orpd    xmm6, xmm4
 35053  	LONG $0xfb540f66                     // andpd    xmm7, xmm3
 35054  	LONG $0xfc560f66                     // orpd    xmm7, xmm4
 35055  	LONG $0xf6e60f66                     // cvttpd2dq    xmm6, xmm6
 35056  	LONG $0x00380f66; BYTE $0xf5         // pshufb    xmm6, xmm5
 35057  	LONG $0xffe60f66                     // cvttpd2dq    xmm7, xmm7
 35058  	LONG $0x00380f66; BYTE $0xfd         // pshufb    xmm7, xmm5
 35059  	LONG $0x10380f66; BYTE $0xf2         // pblendvb    xmm6, xmm2, xmm0
 35060  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 35061  	LONG $0x10380f66; BYTE $0xfa         // pblendvb    xmm7, xmm2, xmm0
 35062  	QUAD $0x003034153a0f4166             // pextrw    word [r8 + rsi], xmm6, 0
 35063  	QUAD $0x02307c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm7, 0
 35064  	LONG $0x74100f66; WORD $0x20f1       // movupd    xmm6, oword [rcx + 8*rsi + 32]
 35065  	LONG $0x7c100f66; WORD $0x30f1       // movupd    xmm7, oword [rcx + 8*rsi + 48]
 35066  	LONG $0xc6280f66                     // movapd    xmm0, xmm6
 35067  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 35068  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35069  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35070  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 35071  	LONG $0xcf280f66                     // movapd    xmm1, xmm7
 35072  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 35073  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35074  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35075  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 35076  	LONG $0xf3540f66                     // andpd    xmm6, xmm3
 35077  	LONG $0xf4560f66                     // orpd    xmm6, xmm4
 35078  	LONG $0xfb540f66                     // andpd    xmm7, xmm3
 35079  	LONG $0xfc560f66                     // orpd    xmm7, xmm4
 35080  	LONG $0xf6e60f66                     // cvttpd2dq    xmm6, xmm6
 35081  	LONG $0x00380f66; BYTE $0xf5         // pshufb    xmm6, xmm5
 35082  	LONG $0xffe60f66                     // cvttpd2dq    xmm7, xmm7
 35083  	LONG $0x00380f66; BYTE $0xfd         // pshufb    xmm7, xmm5
 35084  	LONG $0x10380f66; BYTE $0xf2         // pblendvb    xmm6, xmm2, xmm0
 35085  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 35086  	LONG $0x10380f66; BYTE $0xfa         // pblendvb    xmm7, xmm2, xmm0
 35087  	QUAD $0x043074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 4], xmm6, 0
 35088  	QUAD $0x06307c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 6], xmm7, 0
 35089  	LONG $0x08c68348                     // add    rsi, 8
 35090  	LONG $0x02c78348                     // add    rdi, 2
 35091  	JNE  LBB4_899
 35092  	JMP  LBB4_1544
 35093  
 35094  LBB4_902:
 35095  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 35096  	WORD $0xe683; BYTE $0xe0 // and    esi, -32
 35097  	LONG $0xe0468d48         // lea    rax, [rsi - 32]
 35098  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 35099  	LONG $0x05e9c149         // shr    r9, 5
 35100  	LONG $0x01c18349         // add    r9, 1
 35101  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 35102  	JE   LBB4_1552
 35103  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35104  	LONG $0xfee78348         // and    rdi, -2
 35105  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35106  	WORD $0xc031             // xor    eax, eax
 35107  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 35108  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 35109  	QUAD $0x00000100a56f0f66 // movdqa    xmm4, oword 256[rbp] /* [rip + .LCPI4_22] */
 35110  
 35111  LBB4_904:
 35112  	LONG $0x2c6f0ff3; BYTE $0x01               // movdqu    xmm5, oword [rcx + rax]
 35113  	LONG $0x746f0ff3; WORD $0x1001             // movdqu    xmm6, oword [rcx + rax + 16]
 35114  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 35115  	LONG $0xc5640f66                           // pcmpgtb    xmm0, xmm5
 35116  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 35117  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35118  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 35119  	LONG $0xce640f66                           // pcmpgtb    xmm1, xmm6
 35120  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 35121  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35122  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 35123  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 35124  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 35125  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35126  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 35127  	LONG $0x7f0f41f3; WORD $0x003c             // movdqu    oword [r8 + rax], xmm7
 35128  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 35129  	LONG $0x6c6f0ff3; WORD $0x2001             // movdqu    xmm5, oword [rcx + rax + 32]
 35130  	LONG $0x746f0ff3; WORD $0x3001             // movdqu    xmm6, oword [rcx + rax + 48]
 35131  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 35132  	LONG $0xc5640f66                           // pcmpgtb    xmm0, xmm5
 35133  	LONG $0xea740f66                           // pcmpeqb    xmm5, xmm2
 35134  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35135  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 35136  	LONG $0xce640f66                           // pcmpgtb    xmm1, xmm6
 35137  	LONG $0xf2740f66                           // pcmpeqb    xmm6, xmm2
 35138  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35139  	LONG $0xfc6f0f66                           // movdqa    xmm7, xmm4
 35140  	LONG $0x10380f66; BYTE $0xfd               // pblendvb    xmm7, xmm5, xmm0
 35141  	LONG $0xec6f0f66                           // movdqa    xmm5, xmm4
 35142  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35143  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 35144  	LONG $0x7f0f41f3; WORD $0x007c; BYTE $0x20 // movdqu    oword [r8 + rax + 32], xmm7
 35145  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x30 // movdqu    oword [r8 + rax + 48], xmm5
 35146  	LONG $0x40c08348                           // add    rax, 64
 35147  	LONG $0x02c78348                           // add    rdi, 2
 35148  	JNE  LBB4_904
 35149  	JMP  LBB4_1553
 35150  
 35151  LBB4_907:
 35152  	WORD $0xc289             // mov    edx, eax
 35153  	WORD $0xe283; BYTE $0xfc // and    edx, -4
 35154  	LONG $0xfc728d48         // lea    rsi, [rdx - 4]
 35155  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 35156  	LONG $0x02e9c149         // shr    r9, 2
 35157  	LONG $0x01c18349         // add    r9, 1
 35158  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 35159  	JE   LBB4_1561
 35160  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35161  	LONG $0xfee78348         // and    rdi, -2
 35162  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35163  	WORD $0xf631             // xor    esi, esi
 35164  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 35165  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 35166  	QUAD $0x000000c0956f0f66 // movdqa    xmm2, oword 192[rbp] /* [rip + .LCPI4_18] */
 35167  
 35168  LBB4_909:
 35169  	LONG $0x1c6f0ff3; BYTE $0xf1         // movdqu    xmm3, oword [rcx + 8*rsi]
 35170  	LONG $0x646f0ff3; WORD $0x10f1       // movdqu    xmm4, oword [rcx + 8*rsi + 16]
 35171  	LONG $0x29380f66; BYTE $0xd8         // pcmpeqq    xmm3, xmm0
 35172  	LONG $0xd9ef0f66                     // pxor    xmm3, xmm1
 35173  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 35174  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 35175  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 35176  	LONG $0xdadb0f66                     // pand    xmm3, xmm2
 35177  	LONG $0x29380f66; BYTE $0xe0         // pcmpeqq    xmm4, xmm0
 35178  	LONG $0xe1ef0f66                     // pxor    xmm4, xmm1
 35179  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 35180  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 35181  	LONG $0xe4630f66                     // packsswb    xmm4, xmm4
 35182  	QUAD $0x00301c153a0f4166             // pextrw    word [r8 + rsi], xmm3, 0
 35183  	LONG $0xe2db0f66                     // pand    xmm4, xmm2
 35184  	QUAD $0x023064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm4, 0
 35185  	LONG $0x5c6f0ff3; WORD $0x20f1       // movdqu    xmm3, oword [rcx + 8*rsi + 32]
 35186  	LONG $0x646f0ff3; WORD $0x30f1       // movdqu    xmm4, oword [rcx + 8*rsi + 48]
 35187  	LONG $0x29380f66; BYTE $0xd8         // pcmpeqq    xmm3, xmm0
 35188  	LONG $0xd9ef0f66                     // pxor    xmm3, xmm1
 35189  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 35190  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 35191  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 35192  	LONG $0xdadb0f66                     // pand    xmm3, xmm2
 35193  	LONG $0x29380f66; BYTE $0xe0         // pcmpeqq    xmm4, xmm0
 35194  	LONG $0xe1ef0f66                     // pxor    xmm4, xmm1
 35195  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 35196  	LONG $0xe46b0f66                     // packssdw    xmm4, xmm4
 35197  	LONG $0xe4630f66                     // packsswb    xmm4, xmm4
 35198  	QUAD $0x04305c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 4], xmm3, 0
 35199  	LONG $0xe2db0f66                     // pand    xmm4, xmm2
 35200  	QUAD $0x063064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 6], xmm4, 0
 35201  	LONG $0x08c68348                     // add    rsi, 8
 35202  	LONG $0x02c78348                     // add    rdi, 2
 35203  	JNE  LBB4_909
 35204  	JMP  LBB4_1562
 35205  
 35206  LBB4_912:
 35207  	WORD $0xc289             // mov    edx, eax
 35208  	WORD $0xe283; BYTE $0xf0 // and    edx, -16
 35209  	LONG $0xf0728d48         // lea    rsi, [rdx - 16]
 35210  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 35211  	LONG $0x04e9c149         // shr    r9, 4
 35212  	LONG $0x01c18349         // add    r9, 1
 35213  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 35214  	JE   LBB4_1569
 35215  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35216  	LONG $0xfee78348         // and    rdi, -2
 35217  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35218  	WORD $0xf631             // xor    esi, esi
 35219  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 35220  	LONG $0xc9760f66         // pcmpeqd    xmm1, xmm1
 35221  	QUAD $0x000000f0956f0f66 // movdqa    xmm2, oword 240[rbp] /* [rip + .LCPI4_21] */
 35222  
 35223  LBB4_914:
 35224  	LONG $0x1c6f0ff3; BYTE $0x71               // movdqu    xmm3, oword [rcx + 2*rsi]
 35225  	LONG $0x646f0ff3; WORD $0x1071             // movdqu    xmm4, oword [rcx + 2*rsi + 16]
 35226  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 35227  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 35228  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 35229  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 35230  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 35231  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 35232  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 35233  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 35234  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 35235  	LONG $0x7f0f41f3; WORD $0x301c             // movdqu    oword [r8 + rsi], xmm3
 35236  	LONG $0x5c6f0ff3; WORD $0x2071             // movdqu    xmm3, oword [rcx + 2*rsi + 32]
 35237  	LONG $0x646f0ff3; WORD $0x3071             // movdqu    xmm4, oword [rcx + 2*rsi + 48]
 35238  	LONG $0xd8750f66                           // pcmpeqw    xmm3, xmm0
 35239  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 35240  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 35241  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 35242  	LONG $0xe0750f66                           // pcmpeqw    xmm4, xmm0
 35243  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 35244  	LONG $0xe4630f66                           // packsswb    xmm4, xmm4
 35245  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 35246  	LONG $0xdc6c0f66                           // punpcklqdq    xmm3, xmm4
 35247  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm3
 35248  	LONG $0x20c68348                           // add    rsi, 32
 35249  	LONG $0x02c78348                           // add    rdi, 2
 35250  	JNE  LBB4_914
 35251  	JMP  LBB4_1570
 35252  
 35253  LBB4_917:
 35254  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 35255  	WORD $0xe683; BYTE $0xf0 // and    esi, -16
 35256  	LONG $0xf0468d48         // lea    rax, [rsi - 16]
 35257  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 35258  	LONG $0x04e9c149         // shr    r9, 4
 35259  	LONG $0x01c18349         // add    r9, 1
 35260  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 35261  	JE   LBB4_1577
 35262  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35263  	LONG $0xfee78348         // and    rdi, -2
 35264  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35265  	WORD $0xc031             // xor    eax, eax
 35266  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 35267  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 35268  	QUAD $0x000000f0a56f0f66 // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 35269  
 35270  LBB4_919:
 35271  	LONG $0x2c6f0ff3; BYTE $0x41               // movdqu    xmm5, oword [rcx + 2*rax]
 35272  	LONG $0x746f0ff3; WORD $0x1041             // movdqu    xmm6, oword [rcx + 2*rax + 16]
 35273  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 35274  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 35275  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35276  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 35277  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 35278  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35279  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 35280  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35281  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 35282  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 35283  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35284  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 35285  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35286  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35287  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 35288  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 35289  	LONG $0x7f0f41f3; WORD $0x002c             // movdqu    oword [r8 + rax], xmm5
 35290  	LONG $0x6c6f0ff3; WORD $0x2041             // movdqu    xmm5, oword [rcx + 2*rax + 32]
 35291  	LONG $0x746f0ff3; WORD $0x3041             // movdqu    xmm6, oword [rcx + 2*rax + 48]
 35292  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 35293  	LONG $0xc2650f66                           // pcmpgtw    xmm0, xmm2
 35294  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35295  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 35296  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 35297  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35298  	LONG $0xea750f66                           // pcmpeqw    xmm5, xmm2
 35299  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35300  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 35301  	LONG $0xf2750f66                           // pcmpeqw    xmm6, xmm2
 35302  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35303  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 35304  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35305  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35306  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 35307  	LONG $0xee6c0f66                           // punpcklqdq    xmm5, xmm6
 35308  	LONG $0x7f0f41f3; WORD $0x006c; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm5
 35309  	LONG $0x20c08348                           // add    rax, 32
 35310  	LONG $0x02c78348                           // add    rdi, 2
 35311  	JNE  LBB4_919
 35312  	JMP  LBB4_1578
 35313  
 35314  LBB4_922:
 35315  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 35316  	WORD $0xe683; BYTE $0xfc // and    esi, -4
 35317  	LONG $0xfc468d48         // lea    rax, [rsi - 4]
 35318  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 35319  	LONG $0x02e9c149         // shr    r9, 2
 35320  	LONG $0x01c18349         // add    r9, 1
 35321  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 35322  	JE   LBB4_1586
 35323  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35324  	LONG $0xfee78348         // and    rdi, -2
 35325  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35326  	WORD $0xc031             // xor    eax, eax
 35327  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 35328  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 35329  	QUAD $0x000000c0a56f0f66 // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 35330  
 35331  LBB4_924:
 35332  	LONG $0x2c6f0ff3; BYTE $0xc1         // movdqu    xmm5, oword [rcx + 8*rax]
 35333  	LONG $0x746f0ff3; WORD $0x10c1       // movdqu    xmm6, oword [rcx + 8*rax + 16]
 35334  	LONG $0xc56f0f66                     // movdqa    xmm0, xmm5
 35335  	LONG $0x37380f66; BYTE $0xc2         // pcmpgtq    xmm0, xmm2
 35336  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35337  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35338  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 35339  	LONG $0xce6f0f66                     // movdqa    xmm1, xmm6
 35340  	LONG $0x37380f66; BYTE $0xca         // pcmpgtq    xmm1, xmm2
 35341  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35342  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35343  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 35344  	LONG $0x29380f66; BYTE $0xea         // pcmpeqq    xmm5, xmm2
 35345  	LONG $0xebef0f66                     // pxor    xmm5, xmm3
 35346  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 35347  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 35348  	LONG $0xed630f66                     // packsswb    xmm5, xmm5
 35349  	LONG $0x29380f66; BYTE $0xf2         // pcmpeqq    xmm6, xmm2
 35350  	LONG $0xf3ef0f66                     // pxor    xmm6, xmm3
 35351  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 35352  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 35353  	LONG $0xf6630f66                     // packsswb    xmm6, xmm6
 35354  	LONG $0x10380f66; BYTE $0xec         // pblendvb    xmm5, xmm4, xmm0
 35355  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 35356  	LONG $0x10380f66; BYTE $0xf4         // pblendvb    xmm6, xmm4, xmm0
 35357  	QUAD $0x00002c153a0f4166             // pextrw    word [r8 + rax], xmm5, 0
 35358  	QUAD $0x020074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 2], xmm6, 0
 35359  	LONG $0x6c6f0ff3; WORD $0x20c1       // movdqu    xmm5, oword [rcx + 8*rax + 32]
 35360  	LONG $0x746f0ff3; WORD $0x30c1       // movdqu    xmm6, oword [rcx + 8*rax + 48]
 35361  	LONG $0xc56f0f66                     // movdqa    xmm0, xmm5
 35362  	LONG $0x37380f66; BYTE $0xc2         // pcmpgtq    xmm0, xmm2
 35363  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35364  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 35365  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 35366  	LONG $0xce6f0f66                     // movdqa    xmm1, xmm6
 35367  	LONG $0x37380f66; BYTE $0xca         // pcmpgtq    xmm1, xmm2
 35368  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35369  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 35370  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 35371  	LONG $0x29380f66; BYTE $0xea         // pcmpeqq    xmm5, xmm2
 35372  	LONG $0xebef0f66                     // pxor    xmm5, xmm3
 35373  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 35374  	LONG $0xed6b0f66                     // packssdw    xmm5, xmm5
 35375  	LONG $0xed630f66                     // packsswb    xmm5, xmm5
 35376  	LONG $0x29380f66; BYTE $0xf2         // pcmpeqq    xmm6, xmm2
 35377  	LONG $0xf3ef0f66                     // pxor    xmm6, xmm3
 35378  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 35379  	LONG $0xf66b0f66                     // packssdw    xmm6, xmm6
 35380  	LONG $0xf6630f66                     // packsswb    xmm6, xmm6
 35381  	LONG $0x10380f66; BYTE $0xec         // pblendvb    xmm5, xmm4, xmm0
 35382  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 35383  	LONG $0x10380f66; BYTE $0xf4         // pblendvb    xmm6, xmm4, xmm0
 35384  	QUAD $0x04006c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 4], xmm5, 0
 35385  	QUAD $0x060074153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 6], xmm6, 0
 35386  	LONG $0x08c08348                     // add    rax, 8
 35387  	LONG $0x02c78348                     // add    rdi, 2
 35388  	JNE  LBB4_924
 35389  	JMP  LBB4_1587
 35390  
 35391  LBB4_927:
 35392  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 35393  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 35394  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 35395  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 35396  	LONG $0x03e9c149             // shr    r9, 3
 35397  	LONG $0x01c18349             // add    r9, 1
 35398  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 35399  	JE   LBB4_1595
 35400  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 35401  	LONG $0xfee78348             // and    rdi, -2
 35402  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 35403  	WORD $0xf631                 // xor    esi, esi
 35404  	WORD $0x570f; BYTE $0xe4     // xorps    xmm4, xmm4
 35405  	LONG $0x760f4566; BYTE $0xc0 // pcmpeqd    xmm8, xmm8
 35406  	QUAD $0x00000080b56f0f66     // movdqa    xmm6, oword 128[rbp] /* [rip + .LCPI4_12] */
 35407  
 35408  LBB4_929:
 35409  	LONG $0xb104100f                           // movups    xmm0, oword [rcx + 4*rsi]
 35410  	LONG $0xb14c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rsi + 16]
 35411  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 35412  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 35413  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 35414  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 35415  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 35416  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 35417  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 35418  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 35419  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 35420  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 35421  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35422  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 35423  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 35424  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35425  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 35426  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 35427  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 35428  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35429  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 35430  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 35431  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 35432  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 35433  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35434  	LONG $0x7e0f4166; WORD $0x303c             // movd    dword [r8 + rsi], xmm7
 35435  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm5
 35436  	LONG $0xb144100f; BYTE $0x20               // movups    xmm0, oword [rcx + 4*rsi + 32]
 35437  	LONG $0xb14c100f; BYTE $0x30               // movups    xmm1, oword [rcx + 4*rsi + 48]
 35438  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 35439  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 35440  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 35441  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 35442  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 35443  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 35444  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 35445  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 35446  	LONG $0x660f4166; BYTE $0xc0               // pcmpgtd    xmm0, xmm8
 35447  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 35448  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35449  	LONG $0x660f4166; BYTE $0xc8               // pcmpgtd    xmm1, xmm8
 35450  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 35451  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 35452  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 35453  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35454  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 35455  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35456  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 35457  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 35458  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35459  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 35460  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 35461  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x08 // movd    dword [r8 + rsi + 8], xmm5
 35462  	LONG $0x7e0f4166; WORD $0x307c; BYTE $0x0c // movd    dword [r8 + rsi + 12], xmm7
 35463  	LONG $0x10c68348                           // add    rsi, 16
 35464  	LONG $0x02c78348                           // add    rdi, 2
 35465  	JNE  LBB4_929
 35466  	JMP  LBB4_1596
 35467  
 35468  LBB4_932:
 35469  	WORD $0xc289             // mov    edx, eax
 35470  	WORD $0xe283; BYTE $0xe0 // and    edx, -32
 35471  	LONG $0xe0728d48         // lea    rsi, [rdx - 32]
 35472  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 35473  	LONG $0x05e9c149         // shr    r9, 5
 35474  	LONG $0x01c18349         // add    r9, 1
 35475  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 35476  	JE   LBB4_1604
 35477  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35478  	LONG $0xfee78348         // and    rdi, -2
 35479  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35480  	WORD $0xf631             // xor    esi, esi
 35481  	LONG $0xc0ef0f66         // pxor    xmm0, xmm0
 35482  	QUAD $0x000001008d6f0f66 // movdqa    xmm1, oword 256[rbp] /* [rip + .LCPI4_22] */
 35483  
 35484  LBB4_934:
 35485  	LONG $0x146f0ff3; BYTE $0x31               // movdqu    xmm2, oword [rcx + rsi]
 35486  	LONG $0x5c6f0ff3; WORD $0x1031             // movdqu    xmm3, oword [rcx + rsi + 16]
 35487  	LONG $0xd0740f66                           // pcmpeqb    xmm2, xmm0
 35488  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 35489  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 35490  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 35491  	LONG $0x7f0f41f3; WORD $0x3014             // movdqu    oword [r8 + rsi], xmm2
 35492  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm3
 35493  	LONG $0x546f0ff3; WORD $0x2031             // movdqu    xmm2, oword [rcx + rsi + 32]
 35494  	LONG $0x5c6f0ff3; WORD $0x3031             // movdqu    xmm3, oword [rcx + rsi + 48]
 35495  	LONG $0xd0740f66                           // pcmpeqb    xmm2, xmm0
 35496  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 35497  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 35498  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 35499  	LONG $0x7f0f41f3; WORD $0x3054; BYTE $0x20 // movdqu    oword [r8 + rsi + 32], xmm2
 35500  	LONG $0x7f0f41f3; WORD $0x305c; BYTE $0x30 // movdqu    oword [r8 + rsi + 48], xmm3
 35501  	LONG $0x40c68348                           // add    rsi, 64
 35502  	LONG $0x02c78348                           // add    rdi, 2
 35503  	JNE  LBB4_934
 35504  	JMP  LBB4_1605
 35505  
 35506  LBB4_937:
 35507  	WORD $0x8944; BYTE $0xd6 // mov    esi, r10d
 35508  	WORD $0xe683; BYTE $0xf8 // and    esi, -8
 35509  	LONG $0xf8468d48         // lea    rax, [rsi - 8]
 35510  	WORD $0x8949; BYTE $0xc1 // mov    r9, rax
 35511  	LONG $0x03e9c149         // shr    r9, 3
 35512  	LONG $0x01c18349         // add    r9, 1
 35513  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
 35514  	JE   LBB4_1612
 35515  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35516  	LONG $0xfee78348         // and    rdi, -2
 35517  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35518  	WORD $0xc031             // xor    eax, eax
 35519  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 35520  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 35521  	QUAD $0x00000080a56f0f66 // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 35522  
 35523  LBB4_939:
 35524  	LONG $0x2c6f0ff3; BYTE $0x81               // movdqu    xmm5, oword [rcx + 4*rax]
 35525  	LONG $0x746f0ff3; WORD $0x1081             // movdqu    xmm6, oword [rcx + 4*rax + 16]
 35526  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 35527  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 35528  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 35529  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35530  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 35531  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 35532  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 35533  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35534  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 35535  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35536  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 35537  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 35538  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 35539  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35540  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 35541  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 35542  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35543  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35544  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 35545  	LONG $0x7e0f4166; WORD $0x002c             // movd    dword [r8 + rax], xmm5
 35546  	LONG $0x7e0f4166; WORD $0x0074; BYTE $0x04 // movd    dword [r8 + rax + 4], xmm6
 35547  	LONG $0x6c6f0ff3; WORD $0x2081             // movdqu    xmm5, oword [rcx + 4*rax + 32]
 35548  	LONG $0x746f0ff3; WORD $0x3081             // movdqu    xmm6, oword [rcx + 4*rax + 48]
 35549  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
 35550  	LONG $0xc2660f66                           // pcmpgtd    xmm0, xmm2
 35551  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 35552  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 35553  	LONG $0xce6f0f66                           // movdqa    xmm1, xmm6
 35554  	LONG $0xca660f66                           // pcmpgtd    xmm1, xmm2
 35555  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 35556  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 35557  	LONG $0xea760f66                           // pcmpeqd    xmm5, xmm2
 35558  	LONG $0xebef0f66                           // pxor    xmm5, xmm3
 35559  	LONG $0xed6b0f66                           // packssdw    xmm5, xmm5
 35560  	LONG $0xed630f66                           // packsswb    xmm5, xmm5
 35561  	LONG $0xf2760f66                           // pcmpeqd    xmm6, xmm2
 35562  	LONG $0xf3ef0f66                           // pxor    xmm6, xmm3
 35563  	LONG $0xf66b0f66                           // packssdw    xmm6, xmm6
 35564  	LONG $0xf6630f66                           // packsswb    xmm6, xmm6
 35565  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 35566  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 35567  	LONG $0x10380f66; BYTE $0xf4               // pblendvb    xmm6, xmm4, xmm0
 35568  	LONG $0x7e0f4166; WORD $0x006c; BYTE $0x08 // movd    dword [r8 + rax + 8], xmm5
 35569  	LONG $0x7e0f4166; WORD $0x0074; BYTE $0x0c // movd    dword [r8 + rax + 12], xmm6
 35570  	LONG $0x10c08348                           // add    rax, 16
 35571  	LONG $0x02c78348                           // add    rdi, 2
 35572  	JNE  LBB4_939
 35573  	JMP  LBB4_1613
 35574  
 35575  LBB4_942:
 35576  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 35577  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 35578  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 35579  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 35580  	LONG $0x03e9c149             // shr    r9, 3
 35581  	LONG $0x01c18349             // add    r9, 1
 35582  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 35583  	JE   LBB4_1621
 35584  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 35585  	LONG $0xfee78348             // and    rdi, -2
 35586  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 35587  	WORD $0xf631                 // xor    esi, esi
 35588  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 35589  	LONG $0x4d6f0f66; BYTE $0x50 // movdqa    xmm1, oword 80[rbp] /* [rip + .LCPI4_8] */
 35590  
 35591  LBB4_944:
 35592  	LONG $0x146f0ff3; BYTE $0xb1               // movdqu    xmm2, oword [rcx + 4*rsi]
 35593  	LONG $0x5c6f0ff3; WORD $0x10b1             // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 35594  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 35595  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 35596  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 35597  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 35598  	LONG $0x7f0f41f3; WORD $0xb014             // movdqu    oword [r8 + 4*rsi], xmm2
 35599  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm3
 35600  	LONG $0x546f0ff3; WORD $0x20b1             // movdqu    xmm2, oword [rcx + 4*rsi + 32]
 35601  	LONG $0x5c6f0ff3; WORD $0x30b1             // movdqu    xmm3, oword [rcx + 4*rsi + 48]
 35602  	LONG $0xd0760f66                           // pcmpeqd    xmm2, xmm0
 35603  	LONG $0xd1df0f66                           // pandn    xmm2, xmm1
 35604  	LONG $0xd8760f66                           // pcmpeqd    xmm3, xmm0
 35605  	LONG $0xd9df0f66                           // pandn    xmm3, xmm1
 35606  	LONG $0x7f0f41f3; WORD $0xb054; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm2
 35607  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm3
 35608  	LONG $0x10c68348                           // add    rsi, 16
 35609  	LONG $0x02c78348                           // add    rdi, 2
 35610  	JNE  LBB4_944
 35611  	JMP  LBB4_1622
 35612  
 35613  LBB4_950:
 35614  	WORD $0x8944; BYTE $0xd2 // mov    edx, r10d
 35615  	WORD $0xe283; BYTE $0xf8 // and    edx, -8
 35616  	LONG $0xf8728d48         // lea    rsi, [rdx - 8]
 35617  	WORD $0x8949; BYTE $0xf1 // mov    r9, rsi
 35618  	LONG $0x03e9c149         // shr    r9, 3
 35619  	LONG $0x01c18349         // add    r9, 1
 35620  	WORD $0x8548; BYTE $0xf6 // test    rsi, rsi
 35621  	JE   LBB4_1629
 35622  	WORD $0x894c; BYTE $0xcf // mov    rdi, r9
 35623  	LONG $0xfee78348         // and    rdi, -2
 35624  	WORD $0xf748; BYTE $0xdf // neg    rdi
 35625  	WORD $0xf631             // xor    esi, esi
 35626  	LONG $0xd2ef0f66         // pxor    xmm2, xmm2
 35627  	LONG $0xdb760f66         // pcmpeqd    xmm3, xmm3
 35628  	LONG $0x5065280f         // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 35629  
 35630  LBB4_952:
 35631  	LONG $0x2c6e0f66; BYTE $0x31   // movd    xmm5, dword [rcx + rsi]
 35632  	LONG $0x746e0f66; WORD $0x0431 // movd    xmm6, dword [rcx + rsi + 4]
 35633  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 35634  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 35635  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 35636  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 35637  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 35638  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 35639  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 35640  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 35641  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 35642  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 35643  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 35644  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 35645  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 35646  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 35647  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 35648  	LONG $0x2c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm5
 35649  	LONG $0x74110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm6
 35650  	LONG $0x6c6e0f66; WORD $0x0831 // movd    xmm5, dword [rcx + rsi + 8]
 35651  	LONG $0x746e0f66; WORD $0x0c31 // movd    xmm6, dword [rcx + rsi + 12]
 35652  	LONG $0xc56f0f66               // movdqa    xmm0, xmm5
 35653  	LONG $0xc2640f66               // pcmpgtb    xmm0, xmm2
 35654  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 35655  	LONG $0xce6f0f66               // movdqa    xmm1, xmm6
 35656  	LONG $0xca640f66               // pcmpgtb    xmm1, xmm2
 35657  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 35658  	LONG $0xea740f66               // pcmpeqb    xmm5, xmm2
 35659  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 35660  	LONG $0x21380f66; BYTE $0xed   // pmovsxbd    xmm5, xmm5
 35661  	LONG $0xf2740f66               // pcmpeqb    xmm6, xmm2
 35662  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 35663  	LONG $0x21380f66; BYTE $0xf6   // pmovsxbd    xmm6, xmm6
 35664  	LONG $0x14380f66; BYTE $0xec   // blendvps    xmm5, xmm4, xmm0
 35665  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 35666  	LONG $0x14380f66; BYTE $0xf4   // blendvps    xmm6, xmm4, xmm0
 35667  	LONG $0x6c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm5
 35668  	LONG $0x74110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm6
 35669  	LONG $0x10c68348               // add    rsi, 16
 35670  	LONG $0x02c78348               // add    rdi, 2
 35671  	JNE  LBB4_952
 35672  	JMP  LBB4_1630
 35673  
 35674  LBB4_974:
 35675  	WORD $0x8944; BYTE $0xd2     // mov    edx, r10d
 35676  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 35677  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 35678  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 35679  	LONG $0x03e9c149             // shr    r9, 3
 35680  	LONG $0x01c18349             // add    r9, 1
 35681  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 35682  	JE   LBB4_1638
 35683  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 35684  	LONG $0xfee78348             // and    rdi, -2
 35685  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 35686  	WORD $0xf631                 // xor    esi, esi
 35687  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 35688  	LONG $0xc9760f66             // pcmpeqd    xmm1, xmm1
 35689  	LONG $0x556f0f66; BYTE $0x50 // movdqa    xmm2, oword 80[rbp] /* [rip + .LCPI4_8] */
 35690  
 35691  LBB4_976:
 35692  	LONG $0x1c6e0f66; BYTE $0x31               // movd    xmm3, dword [rcx + rsi]
 35693  	LONG $0x646e0f66; WORD $0x0431             // movd    xmm4, dword [rcx + rsi + 4]
 35694  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 35695  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 35696  	LONG $0x31380f66; BYTE $0xdb               // pmovzxbd    xmm3, xmm3
 35697  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 35698  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 35699  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 35700  	LONG $0x31380f66; BYTE $0xe4               // pmovzxbd    xmm4, xmm4
 35701  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 35702  	LONG $0x7f0f41f3; WORD $0xb01c             // movdqu    oword [r8 + 4*rsi], xmm3
 35703  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm4
 35704  	LONG $0x5c6e0f66; WORD $0x0831             // movd    xmm3, dword [rcx + rsi + 8]
 35705  	LONG $0x646e0f66; WORD $0x0c31             // movd    xmm4, dword [rcx + rsi + 12]
 35706  	LONG $0xd8740f66                           // pcmpeqb    xmm3, xmm0
 35707  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 35708  	LONG $0x31380f66; BYTE $0xdb               // pmovzxbd    xmm3, xmm3
 35709  	LONG $0xdadb0f66                           // pand    xmm3, xmm2
 35710  	LONG $0xe0740f66                           // pcmpeqb    xmm4, xmm0
 35711  	LONG $0xe1ef0f66                           // pxor    xmm4, xmm1
 35712  	LONG $0x31380f66; BYTE $0xe4               // pmovzxbd    xmm4, xmm4
 35713  	LONG $0xe2db0f66                           // pand    xmm4, xmm2
 35714  	LONG $0x7f0f41f3; WORD $0xb05c; BYTE $0x20 // movdqu    oword [r8 + 4*rsi + 32], xmm3
 35715  	LONG $0x7f0f41f3; WORD $0xb064; BYTE $0x30 // movdqu    oword [r8 + 4*rsi + 48], xmm4
 35716  	LONG $0x10c68348                           // add    rsi, 16
 35717  	LONG $0x02c78348                           // add    rdi, 2
 35718  	JNE  LBB4_976
 35719  	JMP  LBB4_1639
 35720  
 35721  LBB4_979:
 35722  	WORD $0x8944; BYTE $0xda     // mov    edx, r11d
 35723  	WORD $0xe283; BYTE $0xf8     // and    edx, -8
 35724  	LONG $0xf8728d48             // lea    rsi, [rdx - 8]
 35725  	WORD $0x8949; BYTE $0xf1     // mov    r9, rsi
 35726  	LONG $0x03e9c149             // shr    r9, 3
 35727  	LONG $0x01c18349             // add    r9, 1
 35728  	WORD $0x8548; BYTE $0xf6     // test    rsi, rsi
 35729  	JE   LBB4_1646
 35730  	WORD $0x894c; BYTE $0xcf     // mov    rdi, r9
 35731  	LONG $0xfee78348             // and    rdi, -2
 35732  	WORD $0xf748; BYTE $0xdf     // neg    rdi
 35733  	WORD $0xf631                 // xor    esi, esi
 35734  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 35735  	LONG $0xdb760f66             // pcmpeqd    xmm3, xmm3
 35736  	LONG $0x656f0f66; BYTE $0x50 // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 35737  
 35738  LBB4_981:
 35739  	LONG $0x2c6f0ff3; BYTE $0xb1   // movdqu    xmm5, oword [rcx + 4*rsi]
 35740  	LONG $0x746f0ff3; WORD $0x10b1 // movdqu    xmm6, oword [rcx + 4*rsi + 16]
 35741  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 35742  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 35743  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 35744  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 35745  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 35746  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 35747  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 35748  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 35749  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 35750  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 35751  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 35752  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 35753  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 35754  	LONG $0x3c110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm7
 35755  	LONG $0x6c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm5
 35756  	LONG $0x6c6f0ff3; WORD $0x20b1 // movdqu    xmm5, oword [rcx + 4*rsi + 32]
 35757  	LONG $0x746f0ff3; WORD $0x30b1 // movdqu    xmm6, oword [rcx + 4*rsi + 48]
 35758  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 35759  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 35760  	LONG $0xea760f66               // pcmpeqd    xmm5, xmm2
 35761  	LONG $0xebef0f66               // pxor    xmm5, xmm3
 35762  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 35763  	LONG $0xce660f66               // pcmpgtd    xmm1, xmm6
 35764  	LONG $0xf2760f66               // pcmpeqd    xmm6, xmm2
 35765  	LONG $0xf3ef0f66               // pxor    xmm6, xmm3
 35766  	LONG $0xfc6f0f66               // movdqa    xmm7, xmm4
 35767  	LONG $0x14380f66; BYTE $0xfd   // blendvps    xmm7, xmm5, xmm0
 35768  	LONG $0xec6f0f66               // movdqa    xmm5, xmm4
 35769  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 35770  	LONG $0x14380f66; BYTE $0xee   // blendvps    xmm5, xmm6, xmm0
 35771  	LONG $0x7c110f41; WORD $0x20b0 // movups    oword [r8 + 4*rsi + 32], xmm7
 35772  	LONG $0x6c110f41; WORD $0x30b0 // movups    oword [r8 + 4*rsi + 48], xmm5
 35773  	LONG $0x10c68348               // add    rsi, 16
 35774  	LONG $0x02c78348               // add    rdi, 2
 35775  	JNE  LBB4_981
 35776  	JMP  LBB4_1647
 35777  
 35778  LBB4_1475:
 35779  	QUAD $0x00000130856e0f66 // movd    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 35780  
 35781  LBB4_1476:
 35782  	JLE  LBB4_1478
 35783  	QUAD $0x00000128856e0f66 // movd    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 35784  
 35785  LBB4_1478:
 35786  	LONG $0x7e0f4166; WORD $0x9004 // movd    dword [r8 + 4*rdx], xmm0
 35787  	LONG $0x01ca8348               // or    rdx, 1
 35788  
 35789  LBB4_1479:
 35790  	WORD $0x0148; BYTE $0xc6 // add    rsi, rax
 35791  	JE   LBB4_1655
 35792  	QUAD $0x00000130856e0f66 // movd    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 35793  	QUAD $0x000001288d6e0f66 // movd    xmm1, dword 296[rbp] /* [rip + .LCPI4_5] */
 35794  	JMP  LBB4_1482
 35795  
 35796  LBB4_1481:
 35797  	LONG $0x7e0f4166; WORD $0x905c; BYTE $0x04 // movd    dword [r8 + 4*rdx + 4], xmm3
 35798  	LONG $0x02c28348                           // add    rdx, 2
 35799  	WORD $0x3948; BYTE $0xd0                   // cmp    rax, rdx
 35800  	JE   LBB4_1655
 35801  
 35802  LBB4_1482:
 35803  	LONG $0x00113c80 // cmp    byte [rcx + rdx], 0
 35804  	LONG $0xd06f0f66 // movdqa    xmm2, xmm0
 35805  	JNE  LBB4_1483
 35806  	LONG $0xd2ef0f66 // pxor    xmm2, xmm2
 35807  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 35808  	JLE  LBB4_1487
 35809  
 35810  LBB4_1484:
 35811  	LONG $0x7e0f4166; WORD $0x901c // movd    dword [r8 + 4*rdx], xmm3
 35812  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 35813  	LONG $0xd06f0f66               // movdqa    xmm2, xmm0
 35814  	JNE  LBB4_1485
 35815  
 35816  LBB4_1488:
 35817  	LONG $0xd2ef0f66 // pxor    xmm2, xmm2
 35818  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 35819  	JG   LBB4_1481
 35820  	JMP  LBB4_1489
 35821  
 35822  LBB4_1483:
 35823  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 35824  	JG   LBB4_1484
 35825  
 35826  LBB4_1487:
 35827  	LONG $0xda6f0f66               // movdqa    xmm3, xmm2
 35828  	LONG $0x7e0f4166; WORD $0x901c // movd    dword [r8 + 4*rdx], xmm3
 35829  	LONG $0x01117c80; BYTE $0x00   // cmp    byte [rcx + rdx + 1], 0
 35830  	LONG $0xd06f0f66               // movdqa    xmm2, xmm0
 35831  	JE   LBB4_1488
 35832  
 35833  LBB4_1485:
 35834  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 35835  	JG   LBB4_1481
 35836  
 35837  LBB4_1489:
 35838  	LONG $0xda6f0f66 // movdqa    xmm3, xmm2
 35839  	JMP  LBB4_1481
 35840  
 35841  LBB4_994:
 35842  	WORD $0xf631 // xor    esi, esi
 35843  
 35844  LBB4_995:
 35845  	LONG $0x01c1f641               // test    r9b, 1
 35846  	JE   LBB4_997
 35847  	LONG $0x046f0ff3; BYTE $0xf1   // movdqu    xmm0, oword [rcx + 8*rsi]
 35848  	LONG $0x4c6f0ff3; WORD $0x10f1 // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 35849  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 35850  	LONG $0x29380f66; BYTE $0xc2   // pcmpeqq    xmm0, xmm2
 35851  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 35852  	QUAD $0x000000a09d6f0f66       // movdqa    xmm3, oword 160[rbp] /* [rip + .LCPI4_16] */
 35853  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 35854  	LONG $0x29380f66; BYTE $0xca   // pcmpeqq    xmm1, xmm2
 35855  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 35856  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 35857  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 35858  	LONG $0x7f0f41f3; WORD $0xb004 // movdqu    oword [r8 + 4*rsi], xmm0
 35859  
 35860  LBB4_997:
 35861  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 35862  	JE   LBB4_1655
 35863  
 35864  LBB4_998:
 35865  	WORD $0xf631                 // xor    esi, esi
 35866  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 35867  	LONG $0xd6950f40             // setne    sil
 35868  	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
 35869  	LONG $0x01c28348             // add    rdx, 1
 35870  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 35871  	JNE  LBB4_998
 35872  	JMP  LBB4_1655
 35873  
 35874  LBB4_999:
 35875  	WORD $0xf631 // xor    esi, esi
 35876  
 35877  LBB4_1000:
 35878  	LONG $0x01c1f641                           // test    r9b, 1
 35879  	JE   LBB4_1002
 35880  	LONG $0x047e0ff3; BYTE $0xb1               // movq    xmm0, qword [rcx + 4*rsi]
 35881  	LONG $0x4c7e0ff3; WORD $0x08b1             // movq    xmm1, qword [rcx + 4*rsi + 8]
 35882  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 35883  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 35884  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 35885  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 35886  	LONG $0x35380f66; BYTE $0xc0               // pmovzxdq    xmm0, xmm0
 35887  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 35888  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 35889  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 35890  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 35891  	LONG $0x35380f66; BYTE $0xc9               // pmovzxdq    xmm1, xmm1
 35892  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 35893  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 35894  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 35895  
 35896  LBB4_1002:
 35897  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 35898  	JE   LBB4_1655
 35899  
 35900  LBB4_1003:
 35901  	WORD $0xf631             // xor    esi, esi
 35902  	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
 35903  	LONG $0xd6950f40         // setne    sil
 35904  	LONG $0xd0348949         // mov    qword [r8 + 8*rdx], rsi
 35905  	LONG $0x01c28348         // add    rdx, 1
 35906  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 35907  	JNE  LBB4_1003
 35908  	JMP  LBB4_1655
 35909  
 35910  LBB4_1004:
 35911  	WORD $0xff31 // xor    edi, edi
 35912  
 35913  LBB4_1005:
 35914  	LONG $0x01c1f641               // test    r9b, 1
 35915  	JE   LBB4_1007
 35916  	LONG $0x04100f66; BYTE $0xf9   // movupd    xmm0, oword [rcx + 8*rdi]
 35917  	LONG $0x4d280f66; BYTE $0x00   // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 35918  	LONG $0xc8540f66               // andpd    xmm1, xmm0
 35919  	LONG $0x4d560f66; BYTE $0x10   // orpd    xmm1, oword 16[rbp] /* [rip + .LCPI4_1] */
 35920  	QUAD $0x0000011895100ff2       // movsd    xmm2, qword 280[rbp] /* [rip + .LCPI4_6] */
 35921  	LONG $0xd9280f66               // movapd    xmm3, xmm1
 35922  	LONG $0xda5c0ff2               // subsd    xmm3, xmm2
 35923  	LONG $0x2c0f48f2; BYTE $0xc3   // cvttsd2si    rax, xmm3
 35924  	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
 35925  	LONG $0x2c0f48f2; BYTE $0xd1   // cvttsd2si    rdx, xmm1
 35926  	LONG $0xca2e0f66               // ucomisd    xmm1, xmm2
 35927  	LONG $0xd0430f48               // cmovae    rdx, rax
 35928  	LONG $0x6e0f4866; BYTE $0xda   // movq    xmm3, rdx
 35929  	LONG $0xc9700f66; BYTE $0xee   // pshufd    xmm1, xmm1, 238
 35930  	LONG $0xe16f0f66               // movdqa    xmm4, xmm1
 35931  	LONG $0xe25c0ff2               // subsd    xmm4, xmm2
 35932  	LONG $0x2c0f48f2; BYTE $0xc4   // cvttsd2si    rax, xmm4
 35933  	WORD $0x314c; BYTE $0xd8       // xor    rax, r11
 35934  	LONG $0x2c0f48f2; BYTE $0xd1   // cvttsd2si    rdx, xmm1
 35935  	LONG $0xca2e0f66               // ucomisd    xmm1, xmm2
 35936  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 35937  	LONG $0xd0430f48               // cmovae    rdx, rax
 35938  	LONG $0x6e0f4866; BYTE $0xd2   // movq    xmm2, rdx
 35939  	LONG $0xda6c0f66               // punpcklqdq    xmm3, xmm2
 35940  	LONG $0xc8c20f66; BYTE $0x04   // cmpneqpd    xmm1, xmm0
 35941  	LONG $0xcb540f66               // andpd    xmm1, xmm3
 35942  	LONG $0x110f4166; WORD $0xf80c // movupd    oword [r8 + 8*rdi], xmm1
 35943  
 35944  LBB4_1007:
 35945  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 35946  	JE   LBB4_1655
 35947  
 35948  LBB4_1008:
 35949  	LONG $0x45280f66; BYTE $0x00 // movapd    xmm0, oword 0[rbp] /* [rip + .LCPI4_0] */
 35950  	QUAD $0x000001108d100ff2     // movsd    xmm1, qword 272[rbp] /* [rip + .LCPI4_2] */
 35951  	QUAD $0x0000011895100ff2     // movsd    xmm2, qword 280[rbp] /* [rip + .LCPI4_6] */
 35952  	WORD $0xc031                 // xor    eax, eax
 35953  	LONG $0xdb570f66             // xorpd    xmm3, xmm3
 35954  
 35955  LBB4_1009:
 35956  	LONG $0x24100ff2; BYTE $0xf1 // movsd    xmm4, qword [rcx + 8*rsi]
 35957  	LONG $0xec280f66             // movapd    xmm5, xmm4
 35958  	LONG $0xe8540f66             // andpd    xmm5, xmm0
 35959  	LONG $0xe9560f66             // orpd    xmm5, xmm1
 35960  	LONG $0xf5280f66             // movapd    xmm6, xmm5
 35961  	LONG $0xf25c0ff2             // subsd    xmm6, xmm2
 35962  	LONG $0x2c0f48f2; BYTE $0xd6 // cvttsd2si    rdx, xmm6
 35963  	WORD $0x314c; BYTE $0xda     // xor    rdx, r11
 35964  	LONG $0x2c0f48f2; BYTE $0xfd // cvttsd2si    rdi, xmm5
 35965  	LONG $0xea2e0f66             // ucomisd    xmm5, xmm2
 35966  	LONG $0xfa430f48             // cmovae    rdi, rdx
 35967  	LONG $0xdc2e0f66             // ucomisd    xmm3, xmm4
 35968  	LONG $0xf8440f48             // cmove    rdi, rax
 35969  	LONG $0xf03c8949             // mov    qword [r8 + 8*rsi], rdi
 35970  	LONG $0x01c68348             // add    rsi, 1
 35971  	WORD $0x3949; BYTE $0xf2     // cmp    r10, rsi
 35972  	JNE  LBB4_1009
 35973  	JMP  LBB4_1655
 35974  
 35975  LBB4_1010:
 35976  	WORD $0xf631 // xor    esi, esi
 35977  
 35978  LBB4_1011:
 35979  	LONG $0x01c1f641                           // test    r9b, 1
 35980  	JE   LBB4_1013
 35981  	LONG $0x046e0f66; BYTE $0x71               // movd    xmm0, dword [rcx + 2*rsi]
 35982  	LONG $0x4c6e0f66; WORD $0x0471             // movd    xmm1, dword [rcx + 2*rsi + 4]
 35983  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 35984  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 35985  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 35986  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 35987  	LONG $0x34380f66; BYTE $0xc0               // pmovzxwq    xmm0, xmm0
 35988  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 35989  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 35990  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 35991  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 35992  	LONG $0x34380f66; BYTE $0xc9               // pmovzxwq    xmm1, xmm1
 35993  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 35994  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 35995  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 35996  
 35997  LBB4_1013:
 35998  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 35999  	JE   LBB4_1655
 36000  
 36001  LBB4_1014:
 36002  	WORD $0xf631                 // xor    esi, esi
 36003  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 36004  	LONG $0xd6950f40             // setne    sil
 36005  	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
 36006  	LONG $0x01c28348             // add    rdx, 1
 36007  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36008  	JNE  LBB4_1014
 36009  	JMP  LBB4_1655
 36010  
 36011  LBB4_1015:
 36012  	WORD $0xf631 // xor    esi, esi
 36013  
 36014  LBB4_1016:
 36015  	LONG $0x01c1f641                           // test    r9b, 1
 36016  	JE   LBB4_1018
 36017  	LONG $0x146e0f66; BYTE $0x71               // movd    xmm2, dword [rcx + 2*rsi]
 36018  	LONG $0x5c6e0f66; WORD $0x0471             // movd    xmm3, dword [rcx + 2*rsi + 4]
 36019  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 36020  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36021  	LONG $0xc4650f66                           // pcmpgtw    xmm0, xmm4
 36022  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 36023  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36024  	LONG $0xcc650f66                           // pcmpgtw    xmm1, xmm4
 36025  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 36026  	LONG $0xd4750f66                           // pcmpeqw    xmm2, xmm4
 36027  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36028  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36029  	LONG $0x24380f66; BYTE $0xd2               // pmovsxwq    xmm2, xmm2
 36030  	LONG $0xdc750f66                           // pcmpeqw    xmm3, xmm4
 36031  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36032  	LONG $0x24380f66; BYTE $0xdb               // pmovsxwq    xmm3, xmm3
 36033  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 36034  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 36035  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36036  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 36037  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 36038  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 36039  
 36040  LBB4_1018:
 36041  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36042  	JE   LBB4_1655
 36043  
 36044  LBB4_1019:
 36045  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36046  
 36047  LBB4_1020:
 36048  	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
 36049  	WORD $0xc031             // xor    eax, eax
 36050  	WORD $0x8566; BYTE $0xff // test    di, di
 36051  	WORD $0x950f; BYTE $0xd0 // setne    al
 36052  	WORD $0xf748; BYTE $0xd8 // neg    rax
 36053  	WORD $0x8566; BYTE $0xff // test    di, di
 36054  	LONG $0xc64f0f48         // cmovg    rax, rsi
 36055  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 36056  	LONG $0x01c28348         // add    rdx, 1
 36057  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36058  	JNE  LBB4_1020
 36059  	JMP  LBB4_1655
 36060  
 36061  LBB4_993:
 36062  	WORD $0x500f; BYTE $0xc8               // movmskps    ecx, xmm0
 36063  	WORD $0xe183; BYTE $0x01               // and    ecx, 1
 36064  	WORD $0xd9f7                           // neg    ecx
 36065  	WORD $0xc983; BYTE $0x01               // or    ecx, 1
 36066  	WORD $0x570f; BYTE $0xc0               // xorps    xmm0, xmm0
 36067  	LONG $0xc12a0ff3                       // cvtsi2ss    xmm0, ecx
 36068  	QUAD $0x0000012c8d100ff3               // movss    xmm1, dword 300[rbp] /* [rip + .LCPI4_9] */
 36069  	WORD $0x280f; BYTE $0xd0               // movaps    xmm2, xmm0
 36070  	LONG $0xd15c0ff3                       // subss    xmm2, xmm1
 36071  	LONG $0x2c0f48f3; BYTE $0xca           // cvttss2si    rcx, xmm2
 36072  	QUAD $0x000000000000ba48; WORD $0x8000 // mov    rdx, -9223372036854775808
 36073  	WORD $0x3148; BYTE $0xca               // xor    rdx, rcx
 36074  	LONG $0x2c0f48f3; BYTE $0xc8           // cvttss2si    rcx, xmm0
 36075  	WORD $0x2e0f; BYTE $0xc1               // ucomiss    xmm0, xmm1
 36076  	LONG $0xca430f48                       // cmovae    rcx, rdx
 36077  	LONG $0xc00c8949                       // mov    qword [r8 + 8*rax], rcx
 36078  	JMP  LBB4_1655
 36079  
 36080  LBB4_1021:
 36081  	WORD $0xf631 // xor    esi, esi
 36082  
 36083  LBB4_1022:
 36084  	LONG $0x01c1f641                           // test    r9b, 1
 36085  	JE   LBB4_1024
 36086  	LONG $0x147e0ff3; BYTE $0xb1               // movq    xmm2, qword [rcx + 4*rsi]
 36087  	LONG $0x5c7e0ff3; WORD $0x08b1             // movq    xmm3, qword [rcx + 4*rsi + 8]
 36088  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 36089  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36090  	LONG $0xc4660f66                           // pcmpgtd    xmm0, xmm4
 36091  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 36092  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36093  	LONG $0xcc660f66                           // pcmpgtd    xmm1, xmm4
 36094  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 36095  	LONG $0xd4760f66                           // pcmpeqd    xmm2, xmm4
 36096  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36097  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36098  	LONG $0x25380f66; BYTE $0xd2               // pmovsxdq    xmm2, xmm2
 36099  	LONG $0xdc760f66                           // pcmpeqd    xmm3, xmm4
 36100  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36101  	LONG $0x25380f66; BYTE $0xdb               // pmovsxdq    xmm3, xmm3
 36102  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 36103  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 36104  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36105  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 36106  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 36107  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 36108  
 36109  LBB4_1024:
 36110  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36111  	JE   LBB4_1655
 36112  
 36113  LBB4_1025:
 36114  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36115  
 36116  LBB4_1026:
 36117  	WORD $0x3c8b; BYTE $0x91 // mov    edi, dword [rcx + 4*rdx]
 36118  	WORD $0xc031             // xor    eax, eax
 36119  	WORD $0xff85             // test    edi, edi
 36120  	WORD $0x950f; BYTE $0xd0 // setne    al
 36121  	WORD $0xf748; BYTE $0xd8 // neg    rax
 36122  	WORD $0xff85             // test    edi, edi
 36123  	LONG $0xc64f0f48         // cmovg    rax, rsi
 36124  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 36125  	LONG $0x01c28348         // add    rdx, 1
 36126  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36127  	JNE  LBB4_1026
 36128  	JMP  LBB4_1655
 36129  
 36130  LBB4_1027:
 36131  	WORD $0xf631 // xor    esi, esi
 36132  
 36133  LBB4_1028:
 36134  	LONG $0x01c1f641                           // test    r9b, 1
 36135  	JE   LBB4_1030
 36136  	LONG $0x046f0ff3; BYTE $0xf1               // movdqu    xmm0, oword [rcx + 8*rsi]
 36137  	LONG $0x4c6f0ff3; WORD $0x10f1             // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 36138  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 36139  	LONG $0x29380f66; BYTE $0xc2               // pcmpeqq    xmm0, xmm2
 36140  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 36141  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 36142  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36143  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36144  	QUAD $0x000000b0a56f0f66                   // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 36145  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 36146  	LONG $0x29380f66; BYTE $0xca               // pcmpeqq    xmm1, xmm2
 36147  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 36148  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36149  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36150  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 36151  	LONG $0x7e0f4166; WORD $0x7004             // movd    dword [r8 + 2*rsi], xmm0
 36152  	LONG $0x7e0f4166; WORD $0x704c; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm1
 36153  
 36154  LBB4_1030:
 36155  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36156  	JE   LBB4_1655
 36157  
 36158  LBB4_1031:
 36159  	WORD $0xf631                 // xor    esi, esi
 36160  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 36161  	LONG $0xd6950f40             // setne    sil
 36162  	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
 36163  	LONG $0x01c28348             // add    rdx, 1
 36164  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36165  	JNE  LBB4_1031
 36166  	JMP  LBB4_1655
 36167  
 36168  LBB4_1032:
 36169  	WORD $0xf631 // xor    esi, esi
 36170  
 36171  LBB4_1033:
 36172  	LONG $0x01c1f641                           // test    r9b, 1
 36173  	JE   LBB4_1035
 36174  	LONG $0x046f0ff3; BYTE $0xf1               // movdqu    xmm0, oword [rcx + 8*rsi]
 36175  	LONG $0x4c6f0ff3; WORD $0x10f1             // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 36176  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 36177  	LONG $0x29380f66; BYTE $0xc2               // pcmpeqq    xmm0, xmm2
 36178  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 36179  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 36180  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36181  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36182  	QUAD $0x000000b0a56f0f66                   // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 36183  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 36184  	LONG $0x29380f66; BYTE $0xca               // pcmpeqq    xmm1, xmm2
 36185  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 36186  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36187  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36188  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 36189  	LONG $0x7e0f4166; WORD $0x7004             // movd    dword [r8 + 2*rsi], xmm0
 36190  	LONG $0x7e0f4166; WORD $0x704c; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm1
 36191  
 36192  LBB4_1035:
 36193  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36194  	JE   LBB4_1655
 36195  
 36196  LBB4_1036:
 36197  	WORD $0xf631                 // xor    esi, esi
 36198  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 36199  	LONG $0xd6950f40             // setne    sil
 36200  	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
 36201  	LONG $0x01c28348             // add    rdx, 1
 36202  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36203  	JNE  LBB4_1036
 36204  	JMP  LBB4_1655
 36205  
 36206  LBB4_1037:
 36207  	WORD $0xf631 // xor    esi, esi
 36208  
 36209  LBB4_1038:
 36210  	LONG $0x01c1f641                           // test    r9b, 1
 36211  	JE   LBB4_1040
 36212  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 36213  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 36214  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 36215  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36216  	LONG $0x37380f66; BYTE $0xc4               // pcmpgtq    xmm0, xmm4
 36217  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36218  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 36219  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36220  	LONG $0x37380f66; BYTE $0xcc               // pcmpgtq    xmm1, xmm4
 36221  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36222  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 36223  	LONG $0x29380f66; BYTE $0xd4               // pcmpeqq    xmm2, xmm4
 36224  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36225  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36226  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 36227  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 36228  	LONG $0x29380f66; BYTE $0xdc               // pcmpeqq    xmm3, xmm4
 36229  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36230  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 36231  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 36232  	QUAD $0x000000b0a56f0f66                   // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 36233  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 36234  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36235  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 36236  	LONG $0x7e0f4166; WORD $0x7014             // movd    dword [r8 + 2*rsi], xmm2
 36237  	LONG $0x7e0f4166; WORD $0x705c; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm3
 36238  
 36239  LBB4_1040:
 36240  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36241  	JE   LBB4_1655
 36242  
 36243  LBB4_1041:
 36244  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36245  
 36246  LBB4_1042:
 36247  	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
 36248  	WORD $0xc031                 // xor    eax, eax
 36249  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 36250  	WORD $0x950f; BYTE $0xd0     // setne    al
 36251  	WORD $0xd8f7                 // neg    eax
 36252  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 36253  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 36254  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 36255  	LONG $0x01c28348             // add    rdx, 1
 36256  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 36257  	JNE  LBB4_1042
 36258  	JMP  LBB4_1655
 36259  
 36260  LBB4_1043:
 36261  	WORD $0xf631 // xor    esi, esi
 36262  
 36263  LBB4_1044:
 36264  	LONG $0x01c1f641               // test    r9b, 1
 36265  	JE   LBB4_1046
 36266  	LONG $0x146f0ff3; BYTE $0xb1   // movdqu    xmm2, oword [rcx + 4*rsi]
 36267  	LONG $0x5c6f0ff3; WORD $0x10b1 // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 36268  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 36269  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 36270  	LONG $0xc4660f66               // pcmpgtd    xmm0, xmm4
 36271  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 36272  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 36273  	LONG $0xcc660f66               // pcmpgtd    xmm1, xmm4
 36274  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 36275  	LONG $0xd4760f66               // pcmpeqd    xmm2, xmm4
 36276  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 36277  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 36278  	LONG $0xd26b0f66               // packssdw    xmm2, xmm2
 36279  	LONG $0xdc760f66               // pcmpeqd    xmm3, xmm4
 36280  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 36281  	LONG $0xdb6b0f66               // packssdw    xmm3, xmm3
 36282  	LONG $0x656f0f66; BYTE $0x70   // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 36283  	LONG $0x10380f66; BYTE $0xd4   // pblendvb    xmm2, xmm4, xmm0
 36284  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 36285  	LONG $0x10380f66; BYTE $0xdc   // pblendvb    xmm3, xmm4, xmm0
 36286  	LONG $0xd36c0f66               // punpcklqdq    xmm2, xmm3
 36287  	LONG $0x7f0f41f3; WORD $0x7014 // movdqu    oword [r8 + 2*rsi], xmm2
 36288  
 36289  LBB4_1046:
 36290  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36291  	JE   LBB4_1655
 36292  
 36293  LBB4_1047:
 36294  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36295  
 36296  LBB4_1048:
 36297  	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
 36298  	WORD $0xc031                 // xor    eax, eax
 36299  	WORD $0xff85                 // test    edi, edi
 36300  	WORD $0x950f; BYTE $0xd0     // setne    al
 36301  	WORD $0xd8f7                 // neg    eax
 36302  	WORD $0xff85                 // test    edi, edi
 36303  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 36304  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 36305  	LONG $0x01c28348             // add    rdx, 1
 36306  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 36307  	JNE  LBB4_1048
 36308  	JMP  LBB4_1655
 36309  
 36310  LBB4_1049:
 36311  	WORD $0xf631 // xor    esi, esi
 36312  
 36313  LBB4_1050:
 36314  	LONG $0x01c1f641               // test    r9b, 1
 36315  	JE   LBB4_1052
 36316  	LONG $0x146f0ff3; BYTE $0xb1   // movdqu    xmm2, oword [rcx + 4*rsi]
 36317  	LONG $0x5c6f0ff3; WORD $0x10b1 // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 36318  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 36319  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 36320  	LONG $0xc4660f66               // pcmpgtd    xmm0, xmm4
 36321  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 36322  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 36323  	LONG $0xcc660f66               // pcmpgtd    xmm1, xmm4
 36324  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 36325  	LONG $0xd4760f66               // pcmpeqd    xmm2, xmm4
 36326  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 36327  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 36328  	LONG $0xd26b0f66               // packssdw    xmm2, xmm2
 36329  	LONG $0xdc760f66               // pcmpeqd    xmm3, xmm4
 36330  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 36331  	LONG $0xdb6b0f66               // packssdw    xmm3, xmm3
 36332  	LONG $0x656f0f66; BYTE $0x70   // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 36333  	LONG $0x10380f66; BYTE $0xd4   // pblendvb    xmm2, xmm4, xmm0
 36334  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 36335  	LONG $0x10380f66; BYTE $0xdc   // pblendvb    xmm3, xmm4, xmm0
 36336  	LONG $0xd36c0f66               // punpcklqdq    xmm2, xmm3
 36337  	LONG $0x7f0f41f3; WORD $0x7014 // movdqu    oword [r8 + 2*rsi], xmm2
 36338  
 36339  LBB4_1052:
 36340  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36341  	JE   LBB4_1655
 36342  
 36343  LBB4_1053:
 36344  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36345  
 36346  LBB4_1054:
 36347  	WORD $0x3c8b; BYTE $0x91     // mov    edi, dword [rcx + 4*rdx]
 36348  	WORD $0xc031                 // xor    eax, eax
 36349  	WORD $0xff85                 // test    edi, edi
 36350  	WORD $0x950f; BYTE $0xd0     // setne    al
 36351  	WORD $0xd8f7                 // neg    eax
 36352  	WORD $0xff85                 // test    edi, edi
 36353  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 36354  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 36355  	LONG $0x01c28348             // add    rdx, 1
 36356  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 36357  	JNE  LBB4_1054
 36358  	JMP  LBB4_1655
 36359  
 36360  LBB4_1055:
 36361  	WORD $0xf631 // xor    esi, esi
 36362  
 36363  LBB4_1056:
 36364  	LONG $0x01c1f641                           // test    r9b, 1
 36365  	JE   LBB4_1058
 36366  	LONG $0x046e0f66; BYTE $0x71               // movd    xmm0, dword [rcx + 2*rsi]
 36367  	LONG $0x4c6e0f66; WORD $0x0471             // movd    xmm1, dword [rcx + 2*rsi + 4]
 36368  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 36369  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 36370  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 36371  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 36372  	LONG $0x34380f66; BYTE $0xc0               // pmovzxwq    xmm0, xmm0
 36373  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 36374  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 36375  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 36376  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 36377  	LONG $0x34380f66; BYTE $0xc9               // pmovzxwq    xmm1, xmm1
 36378  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 36379  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 36380  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 36381  
 36382  LBB4_1058:
 36383  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36384  	JE   LBB4_1655
 36385  
 36386  LBB4_1059:
 36387  	WORD $0xf631                 // xor    esi, esi
 36388  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 36389  	LONG $0xd6950f40             // setne    sil
 36390  	LONG $0xd0348949             // mov    qword [r8 + 8*rdx], rsi
 36391  	LONG $0x01c28348             // add    rdx, 1
 36392  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36393  	JNE  LBB4_1059
 36394  	JMP  LBB4_1655
 36395  
 36396  LBB4_1060:
 36397  	WORD $0xf631 // xor    esi, esi
 36398  
 36399  LBB4_1061:
 36400  	LONG $0x01c1f641                           // test    r9b, 1
 36401  	JE   LBB4_1063
 36402  	LONG $0x147e0ff3; BYTE $0xb1               // movq    xmm2, qword [rcx + 4*rsi]
 36403  	LONG $0x5c7e0ff3; WORD $0x08b1             // movq    xmm3, qword [rcx + 4*rsi + 8]
 36404  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 36405  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36406  	LONG $0xc4660f66                           // pcmpgtd    xmm0, xmm4
 36407  	LONG $0x25380f66; BYTE $0xc0               // pmovsxdq    xmm0, xmm0
 36408  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36409  	LONG $0xcc660f66                           // pcmpgtd    xmm1, xmm4
 36410  	LONG $0x25380f66; BYTE $0xc9               // pmovsxdq    xmm1, xmm1
 36411  	LONG $0xd4760f66                           // pcmpeqd    xmm2, xmm4
 36412  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36413  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36414  	LONG $0x25380f66; BYTE $0xd2               // pmovsxdq    xmm2, xmm2
 36415  	LONG $0xdc760f66                           // pcmpeqd    xmm3, xmm4
 36416  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36417  	LONG $0x25380f66; BYTE $0xdb               // pmovsxdq    xmm3, xmm3
 36418  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 36419  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 36420  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36421  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 36422  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 36423  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 36424  
 36425  LBB4_1063:
 36426  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36427  	JE   LBB4_1655
 36428  
 36429  LBB4_1064:
 36430  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36431  
 36432  LBB4_1065:
 36433  	WORD $0x3c8b; BYTE $0x91 // mov    edi, dword [rcx + 4*rdx]
 36434  	WORD $0xc031             // xor    eax, eax
 36435  	WORD $0xff85             // test    edi, edi
 36436  	WORD $0x950f; BYTE $0xd0 // setne    al
 36437  	WORD $0xf748; BYTE $0xd8 // neg    rax
 36438  	WORD $0xff85             // test    edi, edi
 36439  	LONG $0xc64f0f48         // cmovg    rax, rsi
 36440  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 36441  	LONG $0x01c28348         // add    rdx, 1
 36442  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36443  	JNE  LBB4_1065
 36444  	JMP  LBB4_1655
 36445  
 36446  LBB4_1066:
 36447  	WORD $0xf631 // xor    esi, esi
 36448  
 36449  LBB4_1067:
 36450  	LONG $0x01c1f641                           // test    r9b, 1
 36451  	JE   LBB4_1069
 36452  	LONG $0x146f0ff3; BYTE $0xb1               // movdqu    xmm2, oword [rcx + 4*rsi]
 36453  	LONG $0x5c6f0ff3; WORD $0x10b1             // movdqu    xmm3, oword [rcx + 4*rsi + 16]
 36454  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 36455  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36456  	LONG $0xc4660f66                           // pcmpgtd    xmm0, xmm4
 36457  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36458  	LONG $0xcc660f66                           // pcmpgtd    xmm1, xmm4
 36459  	LONG $0xd4760f66                           // pcmpeqd    xmm2, xmm4
 36460  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36461  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36462  	WORD $0x5b0f; BYTE $0xd2                   // cvtdq2ps    xmm2, xmm2
 36463  	LONG $0xdc760f66                           // pcmpeqd    xmm3, xmm4
 36464  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36465  	WORD $0x5b0f; BYTE $0xdb                   // cvtdq2ps    xmm3, xmm3
 36466  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 36467  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
 36468  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36469  	LONG $0x14380f66; BYTE $0xdc               // blendvps    xmm3, xmm4, xmm0
 36470  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 36471  	LONG $0x5c110f41; WORD $0x10b0             // movups    oword [r8 + 4*rsi + 16], xmm3
 36472  
 36473  LBB4_1069:
 36474  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36475  	JE   LBB4_1655
 36476  
 36477  LBB4_1070:
 36478  	QUAD $0x00000130856e0f66 // movd    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 36479  	QUAD $0x000001288d6e0f66 // movd    xmm1, dword 296[rbp] /* [rip + .LCPI4_5] */
 36480  	JMP  LBB4_1072
 36481  
 36482  LBB4_1071:
 36483  	LONG $0x7e0f4166; WORD $0x901c // movd    dword [r8 + 4*rdx], xmm3
 36484  	LONG $0x01c28348               // add    rdx, 1
 36485  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 36486  	JE   LBB4_1655
 36487  
 36488  LBB4_1072:
 36489  	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
 36490  	LONG $0xd06f0f66 // movdqa    xmm2, xmm0
 36491  	JNE  LBB4_1074
 36492  	LONG $0xd2ef0f66 // pxor    xmm2, xmm2
 36493  
 36494  LBB4_1074:
 36495  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 36496  	JG   LBB4_1071
 36497  	LONG $0xda6f0f66 // movdqa    xmm3, xmm2
 36498  	JMP  LBB4_1071
 36499  
 36500  LBB4_1076:
 36501  	WORD $0xff31 // xor    edi, edi
 36502  
 36503  LBB4_1077:
 36504  	LONG $0x01c1f641               // test    r9b, 1
 36505  	JE   LBB4_1079
 36506  	LONG $0x04100f66; BYTE $0xf9   // movupd    xmm0, oword [rcx + 8*rdi]
 36507  	LONG $0x4c100f66; WORD $0x10f9 // movupd    xmm1, oword [rcx + 8*rdi + 16]
 36508  	LONG $0xd2570f66               // xorpd    xmm2, xmm2
 36509  	LONG $0xd8280f66               // movapd    xmm3, xmm0
 36510  	LONG $0xdac20f66; BYTE $0x00   // cmpeqpd    xmm3, xmm2
 36511  	LONG $0xe8dbc60f               // shufps    xmm3, xmm3, 232
 36512  	LONG $0xd1c20f66; BYTE $0x00   // cmpeqpd    xmm2, xmm1
 36513  	LONG $0xe8d2c60f               // shufps    xmm2, xmm2, 232
 36514  	LONG $0x65280f66; BYTE $0x00   // movapd    xmm4, oword 0[rbp] /* [rip + .LCPI4_0] */
 36515  	LONG $0xc4540f66               // andpd    xmm0, xmm4
 36516  	LONG $0x6d280f66; BYTE $0x10   // movapd    xmm5, oword 16[rbp] /* [rip + .LCPI4_1] */
 36517  	LONG $0xc5560f66               // orpd    xmm0, xmm5
 36518  	LONG $0xcc540f66               // andpd    xmm1, xmm4
 36519  	LONG $0xcd560f66               // orpd    xmm1, xmm5
 36520  	LONG $0xc0e60f66               // cvttpd2dq    xmm0, xmm0
 36521  	LONG $0xc9e60f66               // cvttpd2dq    xmm1, xmm1
 36522  	WORD $0x550f; BYTE $0xd8       // andnps    xmm3, xmm0
 36523  	WORD $0x550f; BYTE $0xd1       // andnps    xmm2, xmm1
 36524  	WORD $0x160f; BYTE $0xda       // movlhps    xmm3, xmm2
 36525  	LONG $0x1c110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm3
 36526  
 36527  LBB4_1079:
 36528  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
 36529  	JE   LBB4_1655
 36530  
 36531  LBB4_1080:
 36532  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 36533  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 36534  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 36535  
 36536  LBB4_1081:
 36537  	LONG $0x1c100ff2; BYTE $0xf1 // movsd    xmm3, qword [rcx + 8*rsi]
 36538  	LONG $0xc32e0f66             // ucomisd    xmm0, xmm3
 36539  	LONG $0xd9540f66             // andpd    xmm3, xmm1
 36540  	LONG $0xda560f66             // orpd    xmm3, xmm2
 36541  	LONG $0xd32c0ff2             // cvttsd2si    edx, xmm3
 36542  	LONG $0xd2440f41             // cmove    edx, r10d
 36543  	LONG $0xb0148941             // mov    dword [r8 + 4*rsi], edx
 36544  	LONG $0x01c68348             // add    rsi, 1
 36545  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 36546  	JNE  LBB4_1081
 36547  	JMP  LBB4_1655
 36548  
 36549  LBB4_1082:
 36550  	WORD $0xf631 // xor    esi, esi
 36551  
 36552  LBB4_1083:
 36553  	LONG $0x01c1f641               // test    r9b, 1
 36554  	JE   LBB4_1085
 36555  	LONG $0x046f0ff3; BYTE $0xf1   // movdqu    xmm0, oword [rcx + 8*rsi]
 36556  	LONG $0x4c6f0ff3; WORD $0x10f1 // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 36557  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 36558  	LONG $0x29380f66; BYTE $0xc2   // pcmpeqq    xmm0, xmm2
 36559  	LONG $0xc0700f66; BYTE $0xe8   // pshufd    xmm0, xmm0, 232
 36560  	QUAD $0x000000a09d6f0f66       // movdqa    xmm3, oword 160[rbp] /* [rip + .LCPI4_16] */
 36561  	LONG $0xc3df0f66               // pandn    xmm0, xmm3
 36562  	LONG $0x29380f66; BYTE $0xca   // pcmpeqq    xmm1, xmm2
 36563  	LONG $0xc9700f66; BYTE $0xe8   // pshufd    xmm1, xmm1, 232
 36564  	LONG $0xcbdf0f66               // pandn    xmm1, xmm3
 36565  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 36566  	LONG $0x7f0f41f3; WORD $0xb004 // movdqu    oword [r8 + 4*rsi], xmm0
 36567  
 36568  LBB4_1085:
 36569  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36570  	JE   LBB4_1655
 36571  
 36572  LBB4_1086:
 36573  	WORD $0xf631                 // xor    esi, esi
 36574  	LONG $0xd13c8348; BYTE $0x00 // cmp    qword [rcx + 8*rdx], 0
 36575  	LONG $0xd6950f40             // setne    sil
 36576  	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
 36577  	LONG $0x01c28348             // add    rdx, 1
 36578  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36579  	JNE  LBB4_1086
 36580  	JMP  LBB4_1655
 36581  
 36582  LBB4_1087:
 36583  	WORD $0xf631 // xor    esi, esi
 36584  
 36585  LBB4_1088:
 36586  	LONG $0x01c1f641                           // test    r9b, 1
 36587  	JE   LBB4_1090
 36588  	LONG $0x047e0ff3; BYTE $0x71               // movq    xmm0, qword [rcx + 2*rsi]
 36589  	LONG $0x4c7e0ff3; WORD $0x0871             // movq    xmm1, qword [rcx + 2*rsi + 8]
 36590  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 36591  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 36592  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 36593  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 36594  	LONG $0x33380f66; BYTE $0xc0               // pmovzxwd    xmm0, xmm0
 36595  	LONG $0x656f0f66; BYTE $0x50               // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 36596  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 36597  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 36598  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 36599  	LONG $0x33380f66; BYTE $0xc9               // pmovzxwd    xmm1, xmm1
 36600  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 36601  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 36602  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 36603  
 36604  LBB4_1090:
 36605  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36606  	JE   LBB4_1655
 36607  
 36608  LBB4_1091:
 36609  	WORD $0xf631                 // xor    esi, esi
 36610  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 36611  	LONG $0xd6950f40             // setne    sil
 36612  	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
 36613  	LONG $0x01c28348             // add    rdx, 1
 36614  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36615  	JNE  LBB4_1091
 36616  	JMP  LBB4_1655
 36617  
 36618  LBB4_1092:
 36619  	WORD $0xf631 // xor    esi, esi
 36620  
 36621  LBB4_1093:
 36622  	LONG $0x01c1f641               // test    r9b, 1
 36623  	JE   LBB4_1095
 36624  	LONG $0x147e0ff3; BYTE $0x71   // movq    xmm2, qword [rcx + 2*rsi]
 36625  	LONG $0x5c7e0ff3; WORD $0x0871 // movq    xmm3, qword [rcx + 2*rsi + 8]
 36626  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 36627  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 36628  	LONG $0xc4650f66               // pcmpgtw    xmm0, xmm4
 36629  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 36630  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 36631  	LONG $0xcc650f66               // pcmpgtw    xmm1, xmm4
 36632  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 36633  	LONG $0xd4750f66               // pcmpeqw    xmm2, xmm4
 36634  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 36635  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 36636  	LONG $0x23380f66; BYTE $0xd2   // pmovsxwd    xmm2, xmm2
 36637  	LONG $0xdc750f66               // pcmpeqw    xmm3, xmm4
 36638  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 36639  	LONG $0x23380f66; BYTE $0xdb   // pmovsxwd    xmm3, xmm3
 36640  	LONG $0x5065280f               // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 36641  	LONG $0x14380f66; BYTE $0xd4   // blendvps    xmm2, xmm4, xmm0
 36642  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 36643  	LONG $0x14380f66; BYTE $0xdc   // blendvps    xmm3, xmm4, xmm0
 36644  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 36645  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 36646  
 36647  LBB4_1095:
 36648  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36649  	JE   LBB4_1655
 36650  
 36651  LBB4_1096:
 36652  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36653  
 36654  LBB4_1097:
 36655  	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
 36656  	WORD $0xc031             // xor    eax, eax
 36657  	WORD $0x8566; BYTE $0xff // test    di, di
 36658  	WORD $0x950f; BYTE $0xd0 // setne    al
 36659  	WORD $0xd8f7             // neg    eax
 36660  	WORD $0x8566; BYTE $0xff // test    di, di
 36661  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
 36662  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 36663  	LONG $0x01c28348         // add    rdx, 1
 36664  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36665  	JNE  LBB4_1097
 36666  	JMP  LBB4_1655
 36667  
 36668  LBB4_1098:
 36669  	WORD $0xf631 // xor    esi, esi
 36670  
 36671  LBB4_1099:
 36672  	LONG $0x01c1f641                           // test    r9b, 1
 36673  	JE   LBB4_1101
 36674  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 36675  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 36676  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 36677  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36678  	LONG $0x37380f66; BYTE $0xc4               // pcmpgtq    xmm0, xmm4
 36679  	LONG $0xc0700f66; BYTE $0xe8               // pshufd    xmm0, xmm0, 232
 36680  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36681  	LONG $0x37380f66; BYTE $0xcc               // pcmpgtq    xmm1, xmm4
 36682  	LONG $0xc9700f66; BYTE $0xe8               // pshufd    xmm1, xmm1, 232
 36683  	LONG $0x29380f66; BYTE $0xd4               // pcmpeqq    xmm2, xmm4
 36684  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 36685  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36686  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36687  	LONG $0x29380f66; BYTE $0xdc               // pcmpeqq    xmm3, xmm4
 36688  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 36689  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36690  	LONG $0xa0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 160[rbp] /* [rip + .LCPI4_16] */
 36691  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
 36692  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36693  	LONG $0x14380f66; BYTE $0xdc               // blendvps    xmm3, xmm4, xmm0
 36694  	WORD $0x160f; BYTE $0xd3                   // movlhps    xmm2, xmm3
 36695  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 36696  
 36697  LBB4_1101:
 36698  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36699  	JE   LBB4_1655
 36700  
 36701  LBB4_1102:
 36702  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36703  
 36704  LBB4_1103:
 36705  	LONG $0xd13c8b48         // mov    rdi, qword [rcx + 8*rdx]
 36706  	WORD $0xc031             // xor    eax, eax
 36707  	WORD $0x8548; BYTE $0xff // test    rdi, rdi
 36708  	WORD $0x950f; BYTE $0xd0 // setne    al
 36709  	WORD $0xd8f7             // neg    eax
 36710  	WORD $0x8548; BYTE $0xff // test    rdi, rdi
 36711  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
 36712  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 36713  	LONG $0x01c28348         // add    rdx, 1
 36714  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36715  	JNE  LBB4_1103
 36716  	JMP  LBB4_1655
 36717  
 36718  LBB4_1106:
 36719  	WORD $0xff31 // xor    edi, edi
 36720  
 36721  LBB4_1107:
 36722  	LONG $0x01c1f641               // test    r9b, 1
 36723  	JE   LBB4_1109
 36724  	LONG $0x1c100f66; BYTE $0xf9   // movupd    xmm3, oword [rcx + 8*rdi]
 36725  	LONG $0x54100f66; WORD $0x10f9 // movupd    xmm2, oword [rcx + 8*rdi + 16]
 36726  	LONG $0xc9570f66               // xorpd    xmm1, xmm1
 36727  	LONG $0xc3280f66               // movapd    xmm0, xmm3
 36728  	LONG $0xc1c20f66; BYTE $0x00   // cmpeqpd    xmm0, xmm1
 36729  	LONG $0xe8c0c60f               // shufps    xmm0, xmm0, 232
 36730  	LONG $0xcac20f66; BYTE $0x00   // cmpeqpd    xmm1, xmm2
 36731  	LONG $0x65280f66; BYTE $0x00   // movapd    xmm4, oword 0[rbp] /* [rip + .LCPI4_0] */
 36732  	LONG $0xdc540f66               // andpd    xmm3, xmm4
 36733  	LONG $0x6d280f66; BYTE $0x10   // movapd    xmm5, oword 16[rbp] /* [rip + .LCPI4_1] */
 36734  	LONG $0xdd560f66               // orpd    xmm3, xmm5
 36735  	LONG $0xd4540f66               // andpd    xmm2, xmm4
 36736  	LONG $0xd5560f66               // orpd    xmm2, xmm5
 36737  	LONG $0xe3700f66; BYTE $0xee   // pshufd    xmm4, xmm3, 238
 36738  	LONG $0x2c0f48f2; BYTE $0xc4   // cvttsd2si    rax, xmm4
 36739  	LONG $0x2c0f48f2; BYTE $0xd3   // cvttsd2si    rdx, xmm3
 36740  	LONG $0xda6e0f66               // movd    xmm3, edx
 36741  	LONG $0x223a0f66; WORD $0x01d8 // pinsrd    xmm3, eax, 1
 36742  	LONG $0xe2700f66; BYTE $0xee   // pshufd    xmm4, xmm2, 238
 36743  	LONG $0x2c0f48f2; BYTE $0xc4   // cvttsd2si    rax, xmm4
 36744  	LONG $0x2c0f48f2; BYTE $0xd2   // cvttsd2si    rdx, xmm2
 36745  	LONG $0xe8c9c60f               // shufps    xmm1, xmm1, 232
 36746  	LONG $0xd26e0f66               // movd    xmm2, edx
 36747  	LONG $0x223a0f66; WORD $0x01d0 // pinsrd    xmm2, eax, 1
 36748  	WORD $0x550f; BYTE $0xc3       // andnps    xmm0, xmm3
 36749  	WORD $0x550f; BYTE $0xca       // andnps    xmm1, xmm2
 36750  	WORD $0x160f; BYTE $0xc1       // movlhps    xmm0, xmm1
 36751  	LONG $0x04110f41; BYTE $0xb8   // movups    oword [r8 + 4*rdi], xmm0
 36752  
 36753  LBB4_1109:
 36754  	WORD $0x394c; BYTE $0xde // cmp    rsi, r11
 36755  	JE   LBB4_1655
 36756  
 36757  LBB4_1110:
 36758  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 36759  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 36760  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 36761  
 36762  LBB4_1111:
 36763  	LONG $0x1c100ff2; BYTE $0xf1 // movsd    xmm3, qword [rcx + 8*rsi]
 36764  	LONG $0xc32e0f66             // ucomisd    xmm0, xmm3
 36765  	LONG $0xd9540f66             // andpd    xmm3, xmm1
 36766  	LONG $0xda560f66             // orpd    xmm3, xmm2
 36767  	LONG $0x2c0f48f2; BYTE $0xc3 // cvttsd2si    rax, xmm3
 36768  	LONG $0xc2440f41             // cmove    eax, r10d
 36769  	LONG $0xb0048941             // mov    dword [r8 + 4*rsi], eax
 36770  	LONG $0x01c68348             // add    rsi, 1
 36771  	WORD $0x3949; BYTE $0xf3     // cmp    r11, rsi
 36772  	JNE  LBB4_1111
 36773  	JMP  LBB4_1655
 36774  
 36775  LBB4_1112:
 36776  	WORD $0xf631 // xor    esi, esi
 36777  
 36778  LBB4_1113:
 36779  	LONG $0x01c1f641                           // test    r9b, 1
 36780  	JE   LBB4_1115
 36781  	LONG $0x047e0ff3; BYTE $0x71               // movq    xmm0, qword [rcx + 2*rsi]
 36782  	LONG $0x4c7e0ff3; WORD $0x0871             // movq    xmm1, qword [rcx + 2*rsi + 8]
 36783  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 36784  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 36785  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 36786  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 36787  	LONG $0x33380f66; BYTE $0xc0               // pmovzxwd    xmm0, xmm0
 36788  	LONG $0x656f0f66; BYTE $0x50               // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 36789  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 36790  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 36791  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 36792  	LONG $0x33380f66; BYTE $0xc9               // pmovzxwd    xmm1, xmm1
 36793  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 36794  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 36795  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 36796  
 36797  LBB4_1115:
 36798  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36799  	JE   LBB4_1655
 36800  
 36801  LBB4_1116:
 36802  	WORD $0xf631                 // xor    esi, esi
 36803  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 36804  	LONG $0xd6950f40             // setne    sil
 36805  	LONG $0x90348941             // mov    dword [r8 + 4*rdx], esi
 36806  	LONG $0x01c28348             // add    rdx, 1
 36807  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36808  	JNE  LBB4_1116
 36809  	JMP  LBB4_1655
 36810  
 36811  LBB4_1117:
 36812  	WORD $0xf631 // xor    esi, esi
 36813  
 36814  LBB4_1118:
 36815  	LONG $0x01c1f641               // test    r9b, 1
 36816  	JE   LBB4_1120
 36817  	LONG $0x147e0ff3; BYTE $0x71   // movq    xmm2, qword [rcx + 2*rsi]
 36818  	LONG $0x5c7e0ff3; WORD $0x0871 // movq    xmm3, qword [rcx + 2*rsi + 8]
 36819  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 36820  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 36821  	LONG $0xc4650f66               // pcmpgtw    xmm0, xmm4
 36822  	LONG $0x23380f66; BYTE $0xc0   // pmovsxwd    xmm0, xmm0
 36823  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 36824  	LONG $0xcc650f66               // pcmpgtw    xmm1, xmm4
 36825  	LONG $0x23380f66; BYTE $0xc9   // pmovsxwd    xmm1, xmm1
 36826  	LONG $0xd4750f66               // pcmpeqw    xmm2, xmm4
 36827  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 36828  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 36829  	LONG $0x23380f66; BYTE $0xd2   // pmovsxwd    xmm2, xmm2
 36830  	LONG $0xdc750f66               // pcmpeqw    xmm3, xmm4
 36831  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 36832  	LONG $0x23380f66; BYTE $0xdb   // pmovsxwd    xmm3, xmm3
 36833  	LONG $0x5065280f               // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 36834  	LONG $0x14380f66; BYTE $0xd4   // blendvps    xmm2, xmm4, xmm0
 36835  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 36836  	LONG $0x14380f66; BYTE $0xdc   // blendvps    xmm3, xmm4, xmm0
 36837  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 36838  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 36839  
 36840  LBB4_1120:
 36841  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36842  	JE   LBB4_1655
 36843  
 36844  LBB4_1121:
 36845  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36846  
 36847  LBB4_1122:
 36848  	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
 36849  	WORD $0xc031             // xor    eax, eax
 36850  	WORD $0x8566; BYTE $0xff // test    di, di
 36851  	WORD $0x950f; BYTE $0xd0 // setne    al
 36852  	WORD $0xd8f7             // neg    eax
 36853  	WORD $0x8566; BYTE $0xff // test    di, di
 36854  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
 36855  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 36856  	LONG $0x01c28348         // add    rdx, 1
 36857  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36858  	JNE  LBB4_1122
 36859  	JMP  LBB4_1655
 36860  
 36861  LBB4_1123:
 36862  	WORD $0xf631 // xor    esi, esi
 36863  
 36864  LBB4_1124:
 36865  	LONG $0x01c1f641                           // test    r9b, 1
 36866  	JE   LBB4_1126
 36867  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 36868  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 36869  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 36870  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 36871  	LONG $0x37380f66; BYTE $0xc4               // pcmpgtq    xmm0, xmm4
 36872  	LONG $0xc0700f66; BYTE $0xe8               // pshufd    xmm0, xmm0, 232
 36873  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 36874  	LONG $0x37380f66; BYTE $0xcc               // pcmpgtq    xmm1, xmm4
 36875  	LONG $0xc9700f66; BYTE $0xe8               // pshufd    xmm1, xmm1, 232
 36876  	LONG $0x29380f66; BYTE $0xd4               // pcmpeqq    xmm2, xmm4
 36877  	LONG $0xd2700f66; BYTE $0xe8               // pshufd    xmm2, xmm2, 232
 36878  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 36879  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 36880  	LONG $0x29380f66; BYTE $0xdc               // pcmpeqq    xmm3, xmm4
 36881  	LONG $0xdb700f66; BYTE $0xe8               // pshufd    xmm3, xmm3, 232
 36882  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 36883  	LONG $0xa0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 160[rbp] /* [rip + .LCPI4_16] */
 36884  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
 36885  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 36886  	LONG $0x14380f66; BYTE $0xdc               // blendvps    xmm3, xmm4, xmm0
 36887  	WORD $0x160f; BYTE $0xd3                   // movlhps    xmm2, xmm3
 36888  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 36889  
 36890  LBB4_1126:
 36891  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 36892  	JE   LBB4_1655
 36893  
 36894  LBB4_1127:
 36895  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 36896  
 36897  LBB4_1128:
 36898  	LONG $0xd13c8b48         // mov    rdi, qword [rcx + 8*rdx]
 36899  	WORD $0xc031             // xor    eax, eax
 36900  	WORD $0x8548; BYTE $0xff // test    rdi, rdi
 36901  	WORD $0x950f; BYTE $0xd0 // setne    al
 36902  	WORD $0xd8f7             // neg    eax
 36903  	WORD $0x8548; BYTE $0xff // test    rdi, rdi
 36904  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
 36905  	LONG $0x90048941         // mov    dword [r8 + 4*rdx], eax
 36906  	LONG $0x01c28348         // add    rdx, 1
 36907  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 36908  	JNE  LBB4_1128
 36909  	JMP  LBB4_1655
 36910  
 36911  LBB4_1129:
 36912  	WORD $0xf631 // xor    esi, esi
 36913  
 36914  LBB4_1130:
 36915  	LONG $0x01c1f641             // test    r9b, 1
 36916  	JE   LBB4_1132
 36917  	LONG $0xb104100f             // movups    xmm0, oword [rcx + 4*rsi]
 36918  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 36919  	LONG $0x04c8c20f             // cmpneqps    xmm1, xmm0
 36920  	LONG $0xe0720f66; BYTE $0x1f // psrad    xmm0, 31
 36921  	LONG $0x45eb0f66; BYTE $0x50 // por    xmm0, oword 80[rbp] /* [rip + .LCPI4_8] */
 36922  	WORD $0x5b0f; BYTE $0xd0     // cvtdq2ps    xmm2, xmm0
 36923  	LONG $0x605d280f             // movaps    xmm3, oword 96[rbp] /* [rip + .LCPI4_10] */
 36924  	WORD $0x280f; BYTE $0xc2     // movaps    xmm0, xmm2
 36925  	LONG $0x01c3c20f             // cmpltps    xmm0, xmm3
 36926  	LONG $0xe25b0ff3             // cvttps2dq    xmm4, xmm2
 36927  	WORD $0x5c0f; BYTE $0xd3     // subps    xmm2, xmm3
 36928  	LONG $0xd25b0ff3             // cvttps2dq    xmm2, xmm2
 36929  	LONG $0x3055570f             // xorps    xmm2, oword 48[rbp] /* [rip + .LCPI4_4] */
 36930  	LONG $0x14380f66; BYTE $0xd4 // blendvps    xmm2, xmm4, xmm0
 36931  	WORD $0x540f; BYTE $0xca     // andps    xmm1, xmm2
 36932  	LONG $0x0c110f41; BYTE $0xb0 // movups    oword [r8 + 4*rsi], xmm1
 36933  
 36934  LBB4_1132:
 36935  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36936  	JE   LBB4_1655
 36937  
 36938  LBB4_1133:
 36939  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 36940  	JMP  LBB4_1135
 36941  
 36942  LBB4_1134:
 36943  	LONG $0x90348941         // mov    dword [r8 + 4*rdx], esi
 36944  	LONG $0x01c28348         // add    rdx, 1
 36945  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 36946  	JE   LBB4_1655
 36947  
 36948  LBB4_1135:
 36949  	LONG $0x0c100ff3; BYTE $0x91 // movss    xmm1, dword [rcx + 4*rdx]
 36950  	WORD $0xf631                 // xor    esi, esi
 36951  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
 36952  	JE   LBB4_1134
 36953  	WORD $0x500f; BYTE $0xf1     // movmskps    esi, xmm1
 36954  	WORD $0xe683; BYTE $0x01     // and    esi, 1
 36955  	WORD $0xdef7                 // neg    esi
 36956  	WORD $0xce83; BYTE $0x01     // or    esi, 1
 36957  	WORD $0x570f; BYTE $0xc9     // xorps    xmm1, xmm1
 36958  	LONG $0xce2a0ff3             // cvtsi2ss    xmm1, esi
 36959  	LONG $0x2c0f48f3; BYTE $0xf1 // cvttss2si    rsi, xmm1
 36960  	JMP  LBB4_1134
 36961  
 36962  LBB4_1137:
 36963  	WORD $0xf631 // xor    esi, esi
 36964  
 36965  LBB4_1138:
 36966  	LONG $0x01c1f641               // test    r9b, 1
 36967  	JE   LBB4_1140
 36968  	LONG $0x046f0ff3; BYTE $0xb1   // movdqu    xmm0, oword [rcx + 4*rsi]
 36969  	LONG $0x4c6f0ff3; WORD $0x10b1 // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 36970  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 36971  	LONG $0xc2760f66               // pcmpeqd    xmm0, xmm2
 36972  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 36973  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 36974  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 36975  	LONG $0x656f0f66; BYTE $0x70   // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 36976  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 36977  	LONG $0xca760f66               // pcmpeqd    xmm1, xmm2
 36978  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 36979  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 36980  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 36981  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 36982  	LONG $0x7f0f41f3; WORD $0x7004 // movdqu    oword [r8 + 2*rsi], xmm0
 36983  
 36984  LBB4_1140:
 36985  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 36986  	JE   LBB4_1655
 36987  
 36988  LBB4_1141:
 36989  	WORD $0xf631                 // xor    esi, esi
 36990  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 36991  	LONG $0xd6950f40             // setne    sil
 36992  	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
 36993  	LONG $0x01c28348             // add    rdx, 1
 36994  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 36995  	JNE  LBB4_1141
 36996  	JMP  LBB4_1655
 36997  
 36998  LBB4_1142:
 36999  	WORD $0xf631 // xor    esi, esi
 37000  
 37001  LBB4_1143:
 37002  	LONG $0x01c1f641               // test    r9b, 1
 37003  	JE   LBB4_1145
 37004  	LONG $0x046f0ff3; BYTE $0xb1   // movdqu    xmm0, oword [rcx + 4*rsi]
 37005  	LONG $0x4c6f0ff3; WORD $0x10b1 // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 37006  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 37007  	LONG $0xc2760f66               // pcmpeqd    xmm0, xmm2
 37008  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 37009  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 37010  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 37011  	LONG $0x656f0f66; BYTE $0x70   // movdqa    xmm4, oword 112[rbp] /* [rip + .LCPI4_11] */
 37012  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 37013  	LONG $0xca760f66               // pcmpeqd    xmm1, xmm2
 37014  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 37015  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 37016  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 37017  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 37018  	LONG $0x7f0f41f3; WORD $0x7004 // movdqu    oword [r8 + 2*rsi], xmm0
 37019  
 37020  LBB4_1145:
 37021  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37022  	JE   LBB4_1655
 37023  
 37024  LBB4_1146:
 37025  	WORD $0xf631                 // xor    esi, esi
 37026  	LONG $0x00913c83             // cmp    dword [rcx + 4*rdx], 0
 37027  	LONG $0xd6950f40             // setne    sil
 37028  	LONG $0x34894166; BYTE $0x50 // mov    word [r8 + 2*rdx], si
 37029  	LONG $0x01c28348             // add    rdx, 1
 37030  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 37031  	JNE  LBB4_1146
 37032  	JMP  LBB4_1655
 37033  
 37034  LBB4_1147:
 37035  	WORD $0xff31 // xor    edi, edi
 37036  
 37037  LBB4_1148:
 37038  	LONG $0x01c1f641                           // test    r9b, 1
 37039  	JE   LBB4_1150
 37040  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 37041  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 37042  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 37043  	LONG $0xc2280f66                           // movapd    xmm0, xmm2
 37044  	LONG $0xc4c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm4
 37045  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37046  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37047  	LONG $0xcb280f66                           // movapd    xmm1, xmm3
 37048  	LONG $0xccc20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm4
 37049  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37050  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37051  	LONG $0x6d280f66; BYTE $0x00               // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI4_0] */
 37052  	LONG $0xd5540f66                           // andpd    xmm2, xmm5
 37053  	LONG $0x75280f66; BYTE $0x10               // movapd    xmm6, oword 16[rbp] /* [rip + .LCPI4_1] */
 37054  	LONG $0xd6560f66                           // orpd    xmm2, xmm6
 37055  	LONG $0xdd540f66                           // andpd    xmm3, xmm5
 37056  	LONG $0xde560f66                           // orpd    xmm3, xmm6
 37057  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
 37058  	LONG $0xdbe60f66                           // cvttpd2dq    xmm3, xmm3
 37059  	LONG $0xd2700ff2; BYTE $0xe8               // pshuflw    xmm2, xmm2, 232
 37060  	LONG $0xdb700ff2; BYTE $0xe8               // pshuflw    xmm3, xmm3, 232
 37061  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 37062  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37063  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 37064  	LONG $0x7e0f4166; WORD $0x7814             // movd    dword [r8 + 2*rdi], xmm2
 37065  	LONG $0x7e0f4166; WORD $0x785c; BYTE $0x04 // movd    dword [r8 + 2*rdi + 4], xmm3
 37066  
 37067  LBB4_1150:
 37068  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
 37069  	JE   LBB4_1655
 37070  
 37071  LBB4_1151:
 37072  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 37073  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 37074  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 37075  
 37076  LBB4_1152:
 37077  	LONG $0x1c100ff2; BYTE $0xf1 // movsd    xmm3, qword [rcx + 8*rsi]
 37078  	LONG $0xc32e0f66             // ucomisd    xmm0, xmm3
 37079  	LONG $0xd9540f66             // andpd    xmm3, xmm1
 37080  	LONG $0xda560f66             // orpd    xmm3, xmm2
 37081  	LONG $0xd32c0ff2             // cvttsd2si    edx, xmm3
 37082  	LONG $0xd2440f41             // cmove    edx, r10d
 37083  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 37084  	LONG $0x01c68348             // add    rsi, 1
 37085  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 37086  	JNE  LBB4_1152
 37087  	JMP  LBB4_1655
 37088  
 37089  LBB4_1153:
 37090  	WORD $0xff31 // xor    edi, edi
 37091  
 37092  LBB4_1154:
 37093  	LONG $0x01c1f641                           // test    r9b, 1
 37094  	JE   LBB4_1156
 37095  	LONG $0x14100f66; BYTE $0xf9               // movupd    xmm2, oword [rcx + 8*rdi]
 37096  	LONG $0x5c100f66; WORD $0x10f9             // movupd    xmm3, oword [rcx + 8*rdi + 16]
 37097  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 37098  	LONG $0xc2280f66                           // movapd    xmm0, xmm2
 37099  	LONG $0xc4c20f66; BYTE $0x00               // cmpeqpd    xmm0, xmm4
 37100  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37101  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37102  	LONG $0xcb280f66                           // movapd    xmm1, xmm3
 37103  	LONG $0xccc20f66; BYTE $0x00               // cmpeqpd    xmm1, xmm4
 37104  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37105  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37106  	LONG $0x6d280f66; BYTE $0x00               // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI4_0] */
 37107  	LONG $0xd5540f66                           // andpd    xmm2, xmm5
 37108  	LONG $0x75280f66; BYTE $0x10               // movapd    xmm6, oword 16[rbp] /* [rip + .LCPI4_1] */
 37109  	LONG $0xd6560f66                           // orpd    xmm2, xmm6
 37110  	LONG $0xdd540f66                           // andpd    xmm3, xmm5
 37111  	LONG $0xde560f66                           // orpd    xmm3, xmm6
 37112  	LONG $0xd2e60f66                           // cvttpd2dq    xmm2, xmm2
 37113  	LONG $0xdbe60f66                           // cvttpd2dq    xmm3, xmm3
 37114  	LONG $0xd2700ff2; BYTE $0xe8               // pshuflw    xmm2, xmm2, 232
 37115  	LONG $0xdb700ff2; BYTE $0xe8               // pshuflw    xmm3, xmm3, 232
 37116  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 37117  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37118  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 37119  	LONG $0x7e0f4166; WORD $0x7814             // movd    dword [r8 + 2*rdi], xmm2
 37120  	LONG $0x7e0f4166; WORD $0x785c; BYTE $0x04 // movd    dword [r8 + 2*rdi + 4], xmm3
 37121  
 37122  LBB4_1156:
 37123  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
 37124  	JE   LBB4_1655
 37125  
 37126  LBB4_1157:
 37127  	LONG $0xc0ef0f66             // pxor    xmm0, xmm0
 37128  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 37129  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 37130  
 37131  LBB4_1158:
 37132  	LONG $0x1c100ff2; BYTE $0xf1 // movsd    xmm3, qword [rcx + 8*rsi]
 37133  	LONG $0xc32e0f66             // ucomisd    xmm0, xmm3
 37134  	LONG $0xd9540f66             // andpd    xmm3, xmm1
 37135  	LONG $0xda560f66             // orpd    xmm3, xmm2
 37136  	LONG $0xd32c0ff2             // cvttsd2si    edx, xmm3
 37137  	LONG $0xd2440f41             // cmove    edx, r10d
 37138  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 37139  	LONG $0x01c68348             // add    rsi, 1
 37140  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 37141  	JNE  LBB4_1158
 37142  	JMP  LBB4_1655
 37143  
 37144  LBB4_1159:
 37145  	WORD $0xf631 // xor    esi, esi
 37146  
 37147  LBB4_1160:
 37148  	LONG $0x01c1f641                           // test    r9b, 1
 37149  	JE   LBB4_1162
 37150  	LONG $0x146f0ff3; BYTE $0xf1               // movdqu    xmm2, oword [rcx + 8*rsi]
 37151  	LONG $0x5c6f0ff3; WORD $0x10f1             // movdqu    xmm3, oword [rcx + 8*rsi + 16]
 37152  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 37153  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 37154  	LONG $0x37380f66; BYTE $0xc4               // pcmpgtq    xmm0, xmm4
 37155  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37156  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37157  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 37158  	LONG $0x37380f66; BYTE $0xcc               // pcmpgtq    xmm1, xmm4
 37159  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37160  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37161  	LONG $0x29380f66; BYTE $0xd4               // pcmpeqq    xmm2, xmm4
 37162  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 37163  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 37164  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 37165  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 37166  	LONG $0x29380f66; BYTE $0xdc               // pcmpeqq    xmm3, xmm4
 37167  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 37168  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 37169  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 37170  	QUAD $0x000000b0a56f0f66                   // movdqa    xmm4, oword 176[rbp] /* [rip + .LCPI4_17] */
 37171  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 37172  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37173  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 37174  	LONG $0x7e0f4166; WORD $0x7014             // movd    dword [r8 + 2*rsi], xmm2
 37175  	LONG $0x7e0f4166; WORD $0x705c; BYTE $0x04 // movd    dword [r8 + 2*rsi + 4], xmm3
 37176  
 37177  LBB4_1162:
 37178  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 37179  	JE   LBB4_1655
 37180  
 37181  LBB4_1163:
 37182  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 37183  
 37184  LBB4_1164:
 37185  	LONG $0xd13c8b48             // mov    rdi, qword [rcx + 8*rdx]
 37186  	WORD $0xc031                 // xor    eax, eax
 37187  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 37188  	WORD $0x950f; BYTE $0xd0     // setne    al
 37189  	WORD $0xd8f7                 // neg    eax
 37190  	WORD $0x8548; BYTE $0xff     // test    rdi, rdi
 37191  	WORD $0x4f0f; BYTE $0xc6     // cmovg    eax, esi
 37192  	LONG $0x04894166; BYTE $0x50 // mov    word [r8 + 2*rdx], ax
 37193  	LONG $0x01c28348             // add    rdx, 1
 37194  	WORD $0x3949; BYTE $0xd2     // cmp    r10, rdx
 37195  	JNE  LBB4_1164
 37196  	JMP  LBB4_1655
 37197  
 37198  LBB4_1165:
 37199  	WORD $0xff31 // xor    edi, edi
 37200  
 37201  LBB4_1166:
 37202  	LONG $0x01c1f641               // test    r9b, 1
 37203  	JE   LBB4_1168
 37204  	LONG $0xb904100f               // movups    xmm0, oword [rcx + 4*rdi]
 37205  	LONG $0xb94c100f; BYTE $0x10   // movups    xmm1, oword [rcx + 4*rdi + 16]
 37206  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 37207  	WORD $0x280f; BYTE $0xd0       // movaps    xmm2, xmm0
 37208  	LONG $0x00d4c20f               // cmpeqps    xmm2, xmm4
 37209  	LONG $0xd26b0f66               // packssdw    xmm2, xmm2
 37210  	WORD $0x280f; BYTE $0xd9       // movaps    xmm3, xmm1
 37211  	LONG $0x00dcc20f               // cmpeqps    xmm3, xmm4
 37212  	LONG $0xdb6b0f66               // packssdw    xmm3, xmm3
 37213  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 37214  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 37215  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 37216  	LONG $0xcd660f66               // pcmpgtd    xmm1, xmm5
 37217  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 37218  	LONG $0x756f0f66; BYTE $0x70   // movdqa    xmm6, oword 112[rbp] /* [rip + .LCPI4_11] */
 37219  	LONG $0xff760f66               // pcmpeqd    xmm7, xmm7
 37220  	LONG $0x10380f66; BYTE $0xfe   // pblendvb    xmm7, xmm6, xmm0
 37221  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 37222  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 37223  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 37224  	LONG $0x10380f66; BYTE $0xfc   // pblendvb    xmm7, xmm4, xmm0
 37225  	LONG $0xc36f0f66               // movdqa    xmm0, xmm3
 37226  	LONG $0x10380f66; BYTE $0xec   // pblendvb    xmm5, xmm4, xmm0
 37227  	LONG $0xfd6c0f66               // punpcklqdq    xmm7, xmm5
 37228  	LONG $0x7f0f41f3; WORD $0x783c // movdqu    oword [r8 + 2*rdi], xmm7
 37229  
 37230  LBB4_1168:
 37231  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
 37232  	JE   LBB4_1655
 37233  
 37234  LBB4_1169:
 37235  	LONG $0xc0ef0f66 // pxor    xmm0, xmm0
 37236  
 37237  LBB4_1170:
 37238  	LONG $0x0c6e0f66; BYTE $0xb1 // movd    xmm1, dword [rcx + 4*rsi]
 37239  	LONG $0xca7e0f66             // movd    edx, xmm1
 37240  	WORD $0xff31                 // xor    edi, edi
 37241  	WORD $0xd285                 // test    edx, edx
 37242  	LONG $0xd7990f40             // setns    dil
 37243  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
 37244  	LONG $0xff3f548d             // lea    edx, [rdi + rdi - 1]
 37245  	LONG $0xd2440f41             // cmove    edx, r10d
 37246  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 37247  	LONG $0x01c68348             // add    rsi, 1
 37248  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 37249  	JNE  LBB4_1170
 37250  	JMP  LBB4_1655
 37251  
 37252  LBB4_1171:
 37253  	WORD $0xff31 // xor    edi, edi
 37254  
 37255  LBB4_1172:
 37256  	LONG $0x01c1f641               // test    r9b, 1
 37257  	JE   LBB4_1174
 37258  	LONG $0xb904100f               // movups    xmm0, oword [rcx + 4*rdi]
 37259  	LONG $0xb94c100f; BYTE $0x10   // movups    xmm1, oword [rcx + 4*rdi + 16]
 37260  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 37261  	WORD $0x280f; BYTE $0xd0       // movaps    xmm2, xmm0
 37262  	LONG $0x00d4c20f               // cmpeqps    xmm2, xmm4
 37263  	LONG $0xd26b0f66               // packssdw    xmm2, xmm2
 37264  	WORD $0x280f; BYTE $0xd9       // movaps    xmm3, xmm1
 37265  	LONG $0x00dcc20f               // cmpeqps    xmm3, xmm4
 37266  	LONG $0xdb6b0f66               // packssdw    xmm3, xmm3
 37267  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 37268  	LONG $0xc5660f66               // pcmpgtd    xmm0, xmm5
 37269  	LONG $0xc06b0f66               // packssdw    xmm0, xmm0
 37270  	LONG $0xcd660f66               // pcmpgtd    xmm1, xmm5
 37271  	LONG $0xc96b0f66               // packssdw    xmm1, xmm1
 37272  	LONG $0x756f0f66; BYTE $0x70   // movdqa    xmm6, oword 112[rbp] /* [rip + .LCPI4_11] */
 37273  	LONG $0xff760f66               // pcmpeqd    xmm7, xmm7
 37274  	LONG $0x10380f66; BYTE $0xfe   // pblendvb    xmm7, xmm6, xmm0
 37275  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 37276  	LONG $0x10380f66; BYTE $0xee   // pblendvb    xmm5, xmm6, xmm0
 37277  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 37278  	LONG $0x10380f66; BYTE $0xfc   // pblendvb    xmm7, xmm4, xmm0
 37279  	LONG $0xc36f0f66               // movdqa    xmm0, xmm3
 37280  	LONG $0x10380f66; BYTE $0xec   // pblendvb    xmm5, xmm4, xmm0
 37281  	LONG $0xfd6c0f66               // punpcklqdq    xmm7, xmm5
 37282  	LONG $0x7f0f41f3; WORD $0x783c // movdqu    oword [r8 + 2*rdi], xmm7
 37283  
 37284  LBB4_1174:
 37285  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
 37286  	JE   LBB4_1655
 37287  
 37288  LBB4_1175:
 37289  	LONG $0xc0ef0f66 // pxor    xmm0, xmm0
 37290  
 37291  LBB4_1176:
 37292  	LONG $0x0c6e0f66; BYTE $0xb1 // movd    xmm1, dword [rcx + 4*rsi]
 37293  	LONG $0xca7e0f66             // movd    edx, xmm1
 37294  	WORD $0xff31                 // xor    edi, edi
 37295  	WORD $0xd285                 // test    edx, edx
 37296  	LONG $0xd7990f40             // setns    dil
 37297  	WORD $0x2e0f; BYTE $0xc1     // ucomiss    xmm0, xmm1
 37298  	LONG $0xff3f548d             // lea    edx, [rdi + rdi - 1]
 37299  	LONG $0xd2440f41             // cmove    edx, r10d
 37300  	LONG $0x14894166; BYTE $0x70 // mov    word [r8 + 2*rsi], dx
 37301  	LONG $0x01c68348             // add    rsi, 1
 37302  	WORD $0x3948; BYTE $0xf0     // cmp    rax, rsi
 37303  	JNE  LBB4_1176
 37304  	JMP  LBB4_1655
 37305  
 37306  LBB4_1177:
 37307  	WORD $0xf631 // xor    esi, esi
 37308  
 37309  LBB4_1178:
 37310  	LONG $0x01c1f641                           // test    r9b, 1
 37311  	JE   LBB4_1180
 37312  	LONG $0x047e0ff3; BYTE $0xb1               // movq    xmm0, qword [rcx + 4*rsi]
 37313  	LONG $0x4c7e0ff3; WORD $0x08b1             // movq    xmm1, qword [rcx + 4*rsi + 8]
 37314  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 37315  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 37316  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 37317  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 37318  	LONG $0x35380f66; BYTE $0xc0               // pmovzxdq    xmm0, xmm0
 37319  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 37320  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 37321  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 37322  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 37323  	LONG $0x35380f66; BYTE $0xc9               // pmovzxdq    xmm1, xmm1
 37324  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 37325  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 37326  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 37327  
 37328  LBB4_1180:
 37329  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37330  	JE   LBB4_1655
 37331  
 37332  LBB4_1181:
 37333  	WORD $0xf631             // xor    esi, esi
 37334  	LONG $0x00913c83         // cmp    dword [rcx + 4*rdx], 0
 37335  	LONG $0xd6950f40         // setne    sil
 37336  	LONG $0xd0348949         // mov    qword [r8 + 8*rdx], rsi
 37337  	LONG $0x01c28348         // add    rdx, 1
 37338  	WORD $0x3948; BYTE $0xd0 // cmp    rax, rdx
 37339  	JNE  LBB4_1181
 37340  	JMP  LBB4_1655
 37341  
 37342  LBB4_1182:
 37343  	WORD $0xf631 // xor    esi, esi
 37344  
 37345  LBB4_1183:
 37346  	LONG $0x01c1f641                           // test    r9b, 1
 37347  	JE   LBB4_1185
 37348  	LONG $0x046f0ff3; BYTE $0xb1               // movdqu    xmm0, oword [rcx + 4*rsi]
 37349  	LONG $0x4c6f0ff3; WORD $0x10b1             // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 37350  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 37351  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 37352  	QUAD $0x000000d09d6f0f66                   // movdqa    xmm3, oword 208[rbp] /* [rip + .LCPI4_19] */
 37353  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 37354  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 37355  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 37356  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 37357  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 37358  
 37359  LBB4_1185:
 37360  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37361  	JE   LBB4_1655
 37362  
 37363  LBB4_1186:
 37364  	QUAD $0x00000128856e0f66 // movd    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 37365  	JMP  LBB4_1188
 37366  
 37367  LBB4_1187:
 37368  	LONG $0x7e0f4166; WORD $0x900c // movd    dword [r8 + 4*rdx], xmm1
 37369  	LONG $0x01c28348               // add    rdx, 1
 37370  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 37371  	JE   LBB4_1655
 37372  
 37373  LBB4_1188:
 37374  	LONG $0x00913c83 // cmp    dword [rcx + 4*rdx], 0
 37375  	LONG $0xc86f0f66 // movdqa    xmm1, xmm0
 37376  	JNE  LBB4_1187
 37377  	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
 37378  	JMP  LBB4_1187
 37379  
 37380  LBB4_1190:
 37381  	WORD $0xf631 // xor    esi, esi
 37382  
 37383  LBB4_1191:
 37384  	LONG $0x01c1f641                           // test    r9b, 1
 37385  	JE   LBB4_1193
 37386  	LONG $0x04100f66; BYTE $0xf1               // movupd    xmm0, oword [rcx + 8*rsi]
 37387  	LONG $0x4c100f66; WORD $0x10f1             // movupd    xmm1, oword [rcx + 8*rsi + 16]
 37388  	LONG $0xd2570f66                           // xorpd    xmm2, xmm2
 37389  	LONG $0x5d280f66; BYTE $0x00               // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 37390  	LONG $0xe0280f66                           // movapd    xmm4, xmm0
 37391  	LONG $0xe3540f66                           // andpd    xmm4, xmm3
 37392  	LONG $0x6d280f66; BYTE $0x10               // movapd    xmm5, oword 16[rbp] /* [rip + .LCPI4_1] */
 37393  	LONG $0xe5560f66                           // orpd    xmm4, xmm5
 37394  	LONG $0xd9540f66                           // andpd    xmm3, xmm1
 37395  	LONG $0xdd560f66                           // orpd    xmm3, xmm5
 37396  	LONG $0x2c0f48f2; BYTE $0xfc               // cvttsd2si    rdi, xmm4
 37397  	LONG $0x6e0f4866; BYTE $0xef               // movq    xmm5, rdi
 37398  	LONG $0xe4700f66; BYTE $0xee               // pshufd    xmm4, xmm4, 238
 37399  	LONG $0x2c0f48f2; BYTE $0xfc               // cvttsd2si    rdi, xmm4
 37400  	LONG $0x6e0f4866; BYTE $0xe7               // movq    xmm4, rdi
 37401  	LONG $0xec6c0f66                           // punpcklqdq    xmm5, xmm4
 37402  	LONG $0x2c0f48f2; BYTE $0xfb               // cvttsd2si    rdi, xmm3
 37403  	LONG $0x6e0f4866; BYTE $0xe7               // movq    xmm4, rdi
 37404  	LONG $0xdb700f66; BYTE $0xee               // pshufd    xmm3, xmm3, 238
 37405  	LONG $0x2c0f48f2; BYTE $0xfb               // cvttsd2si    rdi, xmm3
 37406  	LONG $0x6e0f4866; BYTE $0xdf               // movq    xmm3, rdi
 37407  	LONG $0xe36c0f66                           // punpcklqdq    xmm4, xmm3
 37408  	LONG $0xc2c20f66; BYTE $0x04               // cmpneqpd    xmm0, xmm2
 37409  	LONG $0xc5540f66                           // andpd    xmm0, xmm5
 37410  	LONG $0xcac20f66; BYTE $0x04               // cmpneqpd    xmm1, xmm2
 37411  	LONG $0xcc540f66                           // andpd    xmm1, xmm4
 37412  	LONG $0x110f4166; WORD $0xf004             // movupd    oword [r8 + 8*rsi], xmm0
 37413  	LONG $0x110f4166; WORD $0xf04c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm1
 37414  
 37415  LBB4_1193:
 37416  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37417  	JE   LBB4_1655
 37418  
 37419  LBB4_1194:
 37420  	WORD $0xf631                 // xor    esi, esi
 37421  	LONG $0xc0570f66             // xorpd    xmm0, xmm0
 37422  	LONG $0x4d280f66; BYTE $0x00 // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI4_0] */
 37423  	QUAD $0x0000011095100ff2     // movsd    xmm2, qword 272[rbp] /* [rip + .LCPI4_2] */
 37424  
 37425  LBB4_1195:
 37426  	LONG $0x1c100ff2; BYTE $0xd1 // movsd    xmm3, qword [rcx + 8*rdx]
 37427  	LONG $0xc32e0f66             // ucomisd    xmm0, xmm3
 37428  	LONG $0xd9540f66             // andpd    xmm3, xmm1
 37429  	LONG $0xda560f66             // orpd    xmm3, xmm2
 37430  	LONG $0x2c0f48f2; BYTE $0xfb // cvttsd2si    rdi, xmm3
 37431  	LONG $0xfe440f48             // cmove    rdi, rsi
 37432  	LONG $0xd03c8949             // mov    qword [r8 + 8*rdx], rdi
 37433  	LONG $0x01c28348             // add    rdx, 1
 37434  	WORD $0x3948; BYTE $0xd0     // cmp    rax, rdx
 37435  	JNE  LBB4_1195
 37436  	JMP  LBB4_1655
 37437  
 37438  LBB4_1196:
 37439  	WORD $0xf631 // xor    esi, esi
 37440  
 37441  LBB4_1197:
 37442  	LONG $0x01c1f641                           // test    r9b, 1
 37443  	JE   LBB4_1199
 37444  	LONG $0x14100f66; BYTE $0xf1               // movupd    xmm2, oword [rcx + 8*rsi]
 37445  	LONG $0x100f4466; WORD $0xf144; BYTE $0x10 // movupd    xmm8, oword [rcx + 8*rsi + 16]
 37446  	WORD $0x570f; BYTE $0xc0                   // xorps    xmm0, xmm0
 37447  	LONG $0xda5a0ff2                           // cvtsd2ss    xmm3, xmm2
 37448  	LONG $0xd0c20f66; BYTE $0x00               // cmpeqpd    xmm2, xmm0
 37449  	LONG $0xe8d2c60f                           // shufps    xmm2, xmm2, 232
 37450  	LONG $0x655a0f66; BYTE $0x10               // cvtpd2ps    xmm4, oword 16[rbp] /* [rip + .LCPI4_1] */
 37451  	LONG $0xc20f4166; WORD $0x00c0             // cmpeqpd    xmm0, xmm8
 37452  	LONG $0x6c100ff2; WORD $0x08f1             // movsd    xmm5, qword [rcx + 8*rsi + 8]
 37453  	LONG $0xed5a0ff2                           // cvtsd2ss    xmm5, xmm5
 37454  	LONG $0xe8c0c60f                           // shufps    xmm0, xmm0, 232
 37455  	LONG $0x2075280f                           // movaps    xmm6, oword 32[rbp] /* [rip + .LCPI4_3] */
 37456  	WORD $0x280f; BYTE $0xfe                   // movaps    xmm7, xmm6
 37457  	WORD $0x550f; BYTE $0xfd                   // andnps    xmm7, xmm5
 37458  	LONG $0xec160ff3                           // movshdup    xmm5, xmm4
 37459  	WORD $0x540f; BYTE $0xee                   // andps    xmm5, xmm6
 37460  	WORD $0x560f; BYTE $0xfd                   // orps    xmm7, xmm5
 37461  	WORD $0x280f; BYTE $0xce                   // movaps    xmm1, xmm6
 37462  	WORD $0x550f; BYTE $0xcb                   // andnps    xmm1, xmm3
 37463  	WORD $0x540f; BYTE $0xe6                   // andps    xmm4, xmm6
 37464  	WORD $0x560f; BYTE $0xcc                   // orps    xmm1, xmm4
 37465  	WORD $0x140f; BYTE $0xcf                   // unpcklps    xmm1, xmm7
 37466  	WORD $0x550f; BYTE $0xd1                   // andnps    xmm2, xmm1
 37467  	LONG $0x4c100ff2; WORD $0x18f1             // movsd    xmm1, qword [rcx + 8*rsi + 24]
 37468  	LONG $0xc95a0ff2                           // cvtsd2ss    xmm1, xmm1
 37469  	WORD $0x280f; BYTE $0xde                   // movaps    xmm3, xmm6
 37470  	WORD $0x550f; BYTE $0xd9                   // andnps    xmm3, xmm1
 37471  	WORD $0x560f; BYTE $0xdd                   // orps    xmm3, xmm5
 37472  	WORD $0x570f; BYTE $0xc9                   // xorps    xmm1, xmm1
 37473  	LONG $0x5a0f41f2; BYTE $0xc8               // cvtsd2ss    xmm1, xmm8
 37474  	WORD $0x550f; BYTE $0xf1                   // andnps    xmm6, xmm1
 37475  	WORD $0x560f; BYTE $0xf4                   // orps    xmm6, xmm4
 37476  	WORD $0x140f; BYTE $0xf3                   // unpcklps    xmm6, xmm3
 37477  	WORD $0x550f; BYTE $0xc6                   // andnps    xmm0, xmm6
 37478  	WORD $0x160f; BYTE $0xd0                   // movlhps    xmm2, xmm0
 37479  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 37480  
 37481  LBB4_1199:
 37482  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37483  	JE   LBB4_1655
 37484  
 37485  LBB4_1200:
 37486  	WORD $0x570f; BYTE $0xc0 // xorps    xmm0, xmm0
 37487  	LONG $0x304d280f         // movaps    xmm1, oword 48[rbp] /* [rip + .LCPI4_4] */
 37488  	QUAD $0x0000012895100ff3 // movss    xmm2, dword 296[rbp] /* [rip + .LCPI4_5] */
 37489  	JMP  LBB4_1202
 37490  
 37491  LBB4_1201:
 37492  	LONG $0x110f41f3; WORD $0x901c // movss    dword [r8 + 4*rdx], xmm3
 37493  	LONG $0x01c28348               // add    rdx, 1
 37494  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 37495  	JE   LBB4_1655
 37496  
 37497  LBB4_1202:
 37498  	LONG $0x24100ff2; BYTE $0xd1 // movsd    xmm4, qword [rcx + 8*rdx]
 37499  	LONG $0xc42e0f66             // ucomisd    xmm0, xmm4
 37500  	WORD $0x570f; BYTE $0xdb     // xorps    xmm3, xmm3
 37501  	JE   LBB4_1201
 37502  	WORD $0x570f; BYTE $0xdb     // xorps    xmm3, xmm3
 37503  	LONG $0xdc5a0ff2             // cvtsd2ss    xmm3, xmm4
 37504  	WORD $0x540f; BYTE $0xd9     // andps    xmm3, xmm1
 37505  	WORD $0x560f; BYTE $0xda     // orps    xmm3, xmm2
 37506  	JMP  LBB4_1201
 37507  
 37508  LBB4_1204:
 37509  	WORD $0xf631 // xor    esi, esi
 37510  
 37511  LBB4_1205:
 37512  	LONG $0x01c1f641               // test    r9b, 1
 37513  	JE   LBB4_1207
 37514  	LONG $0x047e0ff3; BYTE $0x71   // movq    xmm0, qword [rcx + 2*rsi]
 37515  	LONG $0x4c7e0ff3; WORD $0x0871 // movq    xmm1, qword [rcx + 2*rsi + 8]
 37516  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 37517  	LONG $0xc2750f66               // pcmpeqw    xmm0, xmm2
 37518  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 37519  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 37520  	LONG $0x33380f66; BYTE $0xc0   // pmovzxwd    xmm0, xmm0
 37521  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 37522  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 37523  	WORD $0x5b0f; BYTE $0xc0       // cvtdq2ps    xmm0, xmm0
 37524  	LONG $0xca750f66               // pcmpeqw    xmm1, xmm2
 37525  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 37526  	LONG $0x33380f66; BYTE $0xc9   // pmovzxwd    xmm1, xmm1
 37527  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 37528  	WORD $0x5b0f; BYTE $0xc9       // cvtdq2ps    xmm1, xmm1
 37529  	LONG $0x04110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm0
 37530  	LONG $0x4c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm1
 37531  
 37532  LBB4_1207:
 37533  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37534  	JE   LBB4_1655
 37535  
 37536  LBB4_1208:
 37537  	QUAD $0x00000128856e0f66 // movd    xmm0, dword 296[rbp] /* [rip + .LCPI4_5] */
 37538  	JMP  LBB4_1210
 37539  
 37540  LBB4_1209:
 37541  	LONG $0x7e0f4166; WORD $0x900c // movd    dword [r8 + 4*rdx], xmm1
 37542  	LONG $0x01c28348               // add    rdx, 1
 37543  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 37544  	JE   LBB4_1655
 37545  
 37546  LBB4_1210:
 37547  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 37548  	LONG $0xc86f0f66             // movdqa    xmm1, xmm0
 37549  	JNE  LBB4_1209
 37550  	LONG $0xc9ef0f66             // pxor    xmm1, xmm1
 37551  	JMP  LBB4_1209
 37552  
 37553  LBB4_1212:
 37554  	WORD $0xf631 // xor    esi, esi
 37555  
 37556  LBB4_1213:
 37557  	LONG $0x01c1f641                           // test    r9b, 1
 37558  	JE   LBB4_1215
 37559  	LONG $0x146e0f66; BYTE $0x71               // movd    xmm2, dword [rcx + 2*rsi]
 37560  	LONG $0x5c6e0f66; WORD $0x0471             // movd    xmm3, dword [rcx + 2*rsi + 4]
 37561  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 37562  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 37563  	LONG $0xc4650f66                           // pcmpgtw    xmm0, xmm4
 37564  	LONG $0x24380f66; BYTE $0xc0               // pmovsxwq    xmm0, xmm0
 37565  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 37566  	LONG $0xcc650f66                           // pcmpgtw    xmm1, xmm4
 37567  	LONG $0x24380f66; BYTE $0xc9               // pmovsxwq    xmm1, xmm1
 37568  	LONG $0xd4750f66                           // pcmpeqw    xmm2, xmm4
 37569  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 37570  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 37571  	LONG $0x24380f66; BYTE $0xd2               // pmovsxwq    xmm2, xmm2
 37572  	LONG $0xdc750f66                           // pcmpeqw    xmm3, xmm4
 37573  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 37574  	LONG $0x24380f66; BYTE $0xdb               // pmovsxwq    xmm3, xmm3
 37575  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 37576  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 37577  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37578  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 37579  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 37580  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 37581  
 37582  LBB4_1215:
 37583  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 37584  	JE   LBB4_1655
 37585  
 37586  LBB4_1216:
 37587  	LONG $0x000001be; BYTE $0x00 // mov    esi, 1
 37588  
 37589  LBB4_1217:
 37590  	LONG $0x513cb70f         // movzx    edi, word [rcx + 2*rdx]
 37591  	WORD $0xc031             // xor    eax, eax
 37592  	WORD $0x8566; BYTE $0xff // test    di, di
 37593  	WORD $0x950f; BYTE $0xd0 // setne    al
 37594  	WORD $0xf748; BYTE $0xd8 // neg    rax
 37595  	WORD $0x8566; BYTE $0xff // test    di, di
 37596  	LONG $0xc64f0f48         // cmovg    rax, rsi
 37597  	LONG $0xd0048949         // mov    qword [r8 + 8*rdx], rax
 37598  	LONG $0x01c28348         // add    rdx, 1
 37599  	WORD $0x3949; BYTE $0xd2 // cmp    r10, rdx
 37600  	JNE  LBB4_1217
 37601  	JMP  LBB4_1655
 37602  
 37603  LBB4_1218:
 37604  	WORD $0xf631 // xor    esi, esi
 37605  
 37606  LBB4_1219:
 37607  	LONG $0x01c1f641                           // test    r9b, 1
 37608  	JE   LBB4_1221
 37609  	LONG $0x147e0ff3; BYTE $0x71               // movq    xmm2, qword [rcx + 2*rsi]
 37610  	LONG $0x5c7e0ff3; WORD $0x0871             // movq    xmm3, qword [rcx + 2*rsi + 8]
 37611  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 37612  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 37613  	LONG $0xc4650f66                           // pcmpgtw    xmm0, xmm4
 37614  	LONG $0x23380f66; BYTE $0xc0               // pmovsxwd    xmm0, xmm0
 37615  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 37616  	LONG $0xcc650f66                           // pcmpgtw    xmm1, xmm4
 37617  	LONG $0x23380f66; BYTE $0xc9               // pmovsxwd    xmm1, xmm1
 37618  	LONG $0xd4750f66                           // pcmpeqw    xmm2, xmm4
 37619  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 37620  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 37621  	LONG $0x23380f66; BYTE $0xd2               // pmovsxwd    xmm2, xmm2
 37622  	WORD $0x5b0f; BYTE $0xd2                   // cvtdq2ps    xmm2, xmm2
 37623  	LONG $0xdc750f66                           // pcmpeqw    xmm3, xmm4
 37624  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 37625  	LONG $0x23380f66; BYTE $0xdb               // pmovsxwd    xmm3, xmm3
 37626  	WORD $0x5b0f; BYTE $0xdb                   // cvtdq2ps    xmm3, xmm3
 37627  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 37628  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
 37629  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37630  	LONG $0x14380f66; BYTE $0xdc               // blendvps    xmm3, xmm4, xmm0
 37631  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 37632  	LONG $0x5c110f41; WORD $0x10b0             // movups    oword [r8 + 4*rsi + 16], xmm3
 37633  
 37634  LBB4_1221:
 37635  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37636  	JE   LBB4_1655
 37637  
 37638  LBB4_1222:
 37639  	QUAD $0x00000130856e0f66 // movd    xmm0, dword 304[rbp] /* [rip + .LCPI4_14] */
 37640  	QUAD $0x000001288d6e0f66 // movd    xmm1, dword 296[rbp] /* [rip + .LCPI4_5] */
 37641  	JMP  LBB4_1224
 37642  
 37643  LBB4_1223:
 37644  	LONG $0x7e0f4166; WORD $0x901c // movd    dword [r8 + 4*rdx], xmm3
 37645  	LONG $0x01c28348               // add    rdx, 1
 37646  	WORD $0x3948; BYTE $0xd0       // cmp    rax, rdx
 37647  	JE   LBB4_1655
 37648  
 37649  LBB4_1224:
 37650  	LONG $0x513c8366; BYTE $0x00 // cmp    word [rcx + 2*rdx], 0
 37651  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
 37652  	JNE  LBB4_1226
 37653  	LONG $0xd2ef0f66             // pxor    xmm2, xmm2
 37654  
 37655  LBB4_1226:
 37656  	LONG $0xd96f0f66 // movdqa    xmm3, xmm1
 37657  	JG   LBB4_1223
 37658  	LONG $0xda6f0f66 // movdqa    xmm3, xmm2
 37659  	JMP  LBB4_1223
 37660  
 37661  LBB4_1104:
 37662  	WORD $0x500f; BYTE $0xc8     // movmskps    ecx, xmm0
 37663  	WORD $0xe183; BYTE $0x01     // and    ecx, 1
 37664  	WORD $0xd9f7                 // neg    ecx
 37665  	WORD $0xc983; BYTE $0x01     // or    ecx, 1
 37666  	WORD $0x570f; BYTE $0xc0     // xorps    xmm0, xmm0
 37667  	LONG $0xc12a0ff3             // cvtsi2ss    xmm0, ecx
 37668  	LONG $0x2c0f48f3; BYTE $0xc8 // cvttss2si    rcx, xmm0
 37669  
 37670  LBB4_1105:
 37671  	LONG $0xc00c8949 // mov    qword [r8 + 8*rax], rcx
 37672  
 37673  LBB4_1655:
 37674  	RET
 37675  
 37676  LBB4_1228:
 37677  	WORD $0xf631 // xor    esi, esi
 37678  
 37679  LBB4_1229:
 37680  	LONG $0x01c1f641                           // test    r9b, 1
 37681  	JE   LBB4_1231
 37682  	LONG $0x046f0ff3; BYTE $0xb1               // movdqu    xmm0, oword [rcx + 4*rsi]
 37683  	LONG $0x4c6f0ff3; WORD $0x10b1             // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 37684  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 37685  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 37686  	LONG $0x5d6f0f66; BYTE $0x50               // movdqa    xmm3, oword 80[rbp] /* [rip + .LCPI4_8] */
 37687  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 37688  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 37689  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 37690  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 37691  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 37692  
 37693  LBB4_1231:
 37694  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 37695  	JE   LBB4_1655
 37696  	JMP  LBB4_1232
 37697  
 37698  LBB4_1236:
 37699  	WORD $0xf631 // xor    esi, esi
 37700  
 37701  LBB4_1237:
 37702  	LONG $0x01c1f641               // test    r9b, 1
 37703  	JE   LBB4_1239
 37704  	LONG $0x146e0f66; BYTE $0x31   // movd    xmm2, dword [rcx + rsi]
 37705  	LONG $0x5c6e0f66; WORD $0x0431 // movd    xmm3, dword [rcx + rsi + 4]
 37706  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 37707  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 37708  	LONG $0xc4640f66               // pcmpgtb    xmm0, xmm4
 37709  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 37710  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 37711  	LONG $0xcc640f66               // pcmpgtb    xmm1, xmm4
 37712  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 37713  	LONG $0xd4740f66               // pcmpeqb    xmm2, xmm4
 37714  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 37715  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 37716  	LONG $0x21380f66; BYTE $0xd2   // pmovsxbd    xmm2, xmm2
 37717  	LONG $0xdc740f66               // pcmpeqb    xmm3, xmm4
 37718  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 37719  	LONG $0x21380f66; BYTE $0xdb   // pmovsxbd    xmm3, xmm3
 37720  	LONG $0x5065280f               // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 37721  	LONG $0x14380f66; BYTE $0xd4   // blendvps    xmm2, xmm4, xmm0
 37722  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 37723  	LONG $0x14380f66; BYTE $0xdc   // blendvps    xmm3, xmm4, xmm0
 37724  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 37725  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 37726  
 37727  LBB4_1239:
 37728  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 37729  	JE   LBB4_1655
 37730  	JMP  LBB4_1240
 37731  
 37732  LBB4_1245:
 37733  	WORD $0xf631 // xor    esi, esi
 37734  
 37735  LBB4_1246:
 37736  	LONG $0x01c1f641                           // test    r9b, 1
 37737  	JE   LBB4_1248
 37738  	LONG $0x046e0f66; BYTE $0x31               // movd    xmm0, dword [rcx + rsi]
 37739  	LONG $0x4c6e0f66; WORD $0x0431             // movd    xmm1, dword [rcx + rsi + 4]
 37740  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 37741  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 37742  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 37743  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 37744  	LONG $0x31380f66; BYTE $0xc0               // pmovzxbd    xmm0, xmm0
 37745  	LONG $0x656f0f66; BYTE $0x50               // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 37746  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 37747  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 37748  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 37749  	LONG $0x31380f66; BYTE $0xc9               // pmovzxbd    xmm1, xmm1
 37750  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 37751  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 37752  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 37753  
 37754  LBB4_1248:
 37755  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 37756  	JE   LBB4_1655
 37757  	JMP  LBB4_1249
 37758  
 37759  LBB4_1253:
 37760  	WORD $0xf631 // xor    esi, esi
 37761  
 37762  LBB4_1254:
 37763  	LONG $0x01c1f641               // test    r9b, 1
 37764  	JE   LBB4_1256
 37765  	LONG $0x0c6f0ff3; BYTE $0xb1   // movdqu    xmm1, oword [rcx + 4*rsi]
 37766  	LONG $0x546f0ff3; WORD $0x10b1 // movdqu    xmm2, oword [rcx + 4*rsi + 16]
 37767  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 37768  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 37769  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 37770  	LONG $0xc1660f66               // pcmpgtd    xmm0, xmm1
 37771  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 37772  	LONG $0xeb760f66               // pcmpeqd    xmm5, xmm3
 37773  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 37774  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 37775  	LONG $0xda760f66               // pcmpeqd    xmm3, xmm2
 37776  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 37777  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 37778  	LONG $0xca660f66               // pcmpgtd    xmm1, xmm2
 37779  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 37780  	LONG $0x14380f66; BYTE $0xd5   // blendvps    xmm2, xmm5, xmm0
 37781  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 37782  	LONG $0x14380f66; BYTE $0xe3   // blendvps    xmm4, xmm3, xmm0
 37783  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 37784  	LONG $0x64110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm4
 37785  
 37786  LBB4_1256:
 37787  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 37788  	JE   LBB4_1655
 37789  	JMP  LBB4_1257
 37790  
 37791  LBB4_1262:
 37792  	WORD $0xf631 // xor    esi, esi
 37793  
 37794  LBB4_1263:
 37795  	LONG $0x01c1f641                           // test    r9b, 1
 37796  	JE   LBB4_1265
 37797  	LONG $0x04100f66; BYTE $0xf1               // movupd    xmm0, oword [rcx + 8*rsi]
 37798  	LONG $0x4c100f66; WORD $0x10f1             // movupd    xmm1, oword [rcx + 8*rsi + 16]
 37799  	LONG $0xd2570f66                           // xorpd    xmm2, xmm2
 37800  	LONG $0x5d280f66; BYTE $0x00               // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI4_0] */
 37801  	LONG $0xe0280f66                           // movapd    xmm4, xmm0
 37802  	LONG $0xe3540f66                           // andpd    xmm4, xmm3
 37803  	LONG $0x6d280f66; BYTE $0x10               // movapd    xmm5, oword 16[rbp] /* [rip + .LCPI4_1] */
 37804  	LONG $0xe5560f66                           // orpd    xmm4, xmm5
 37805  	LONG $0xd9540f66                           // andpd    xmm3, xmm1
 37806  	LONG $0xdd560f66                           // orpd    xmm3, xmm5
 37807  	LONG $0xc2c20f66; BYTE $0x04               // cmpneqpd    xmm0, xmm2
 37808  	LONG $0xc4540f66                           // andpd    xmm0, xmm4
 37809  	LONG $0xcac20f66; BYTE $0x04               // cmpneqpd    xmm1, xmm2
 37810  	LONG $0xcb540f66                           // andpd    xmm1, xmm3
 37811  	LONG $0x110f4166; WORD $0xf004             // movupd    oword [r8 + 8*rsi], xmm0
 37812  	LONG $0x110f4166; WORD $0xf04c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm1
 37813  
 37814  LBB4_1265:
 37815  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37816  	JE   LBB4_1655
 37817  	JMP  LBB4_1266
 37818  
 37819  LBB4_1271:
 37820  	WORD $0xf631 // xor    esi, esi
 37821  
 37822  LBB4_1272:
 37823  	LONG $0x01c1f641                           // test    r9b, 1
 37824  	JE   LBB4_1274
 37825  	LONG $0x046f0ff3; BYTE $0xb1               // movdqu    xmm0, oword [rcx + 4*rsi]
 37826  	LONG $0x4c6f0ff3; WORD $0x10b1             // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 37827  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 37828  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 37829  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 37830  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 37831  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 37832  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 37833  	QUAD $0x00000080a56f0f66                   // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 37834  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 37835  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 37836  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 37837  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 37838  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 37839  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 37840  	LONG $0x7e0f4166; WORD $0x3004             // movd    dword [r8 + rsi], xmm0
 37841  	LONG $0x7e0f4166; WORD $0x304c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm1
 37842  
 37843  LBB4_1274:
 37844  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37845  	JE   LBB4_1655
 37846  	JMP  LBB4_1275
 37847  
 37848  LBB4_1279:
 37849  	WORD $0xf631 // xor    esi, esi
 37850  
 37851  LBB4_1280:
 37852  	LONG $0x01c1f641                     // test    r9b, 1
 37853  	JE   LBB4_1282
 37854  	LONG $0x1c100f66; BYTE $0xf1         // movupd    xmm3, oword [rcx + 8*rsi]
 37855  	LONG $0x64100f66; WORD $0x10f1       // movupd    xmm4, oword [rcx + 8*rsi + 16]
 37856  	LONG $0xd2570f66                     // xorpd    xmm2, xmm2
 37857  	LONG $0xc3280f66                     // movapd    xmm0, xmm3
 37858  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 37859  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 37860  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 37861  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 37862  	LONG $0xcc280f66                     // movapd    xmm1, xmm4
 37863  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 37864  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 37865  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 37866  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 37867  	LONG $0x6d280f66; BYTE $0x00         // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI4_0] */
 37868  	LONG $0xdd540f66                     // andpd    xmm3, xmm5
 37869  	LONG $0x75280f66; BYTE $0x10         // movapd    xmm6, oword 16[rbp] /* [rip + .LCPI4_1] */
 37870  	LONG $0xde560f66                     // orpd    xmm3, xmm6
 37871  	LONG $0xe5540f66                     // andpd    xmm4, xmm5
 37872  	LONG $0xe6560f66                     // orpd    xmm4, xmm6
 37873  	LONG $0xdbe60f66                     // cvttpd2dq    xmm3, xmm3
 37874  	LONG $0x6d6f0f66; BYTE $0x40         // movdqa    xmm5, oword 64[rbp] /* [rip + .LCPI4_7] */
 37875  	LONG $0x00380f66; BYTE $0xdd         // pshufb    xmm3, xmm5
 37876  	LONG $0xe4e60f66                     // cvttpd2dq    xmm4, xmm4
 37877  	LONG $0x00380f66; BYTE $0xe5         // pshufb    xmm4, xmm5
 37878  	LONG $0x10380f66; BYTE $0xda         // pblendvb    xmm3, xmm2, xmm0
 37879  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 37880  	LONG $0x10380f66; BYTE $0xe2         // pblendvb    xmm4, xmm2, xmm0
 37881  	QUAD $0x00301c153a0f4166             // pextrw    word [r8 + rsi], xmm3, 0
 37882  	QUAD $0x023064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm4, 0
 37883  
 37884  LBB4_1282:
 37885  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37886  	JE   LBB4_1655
 37887  	JMP  LBB4_1283
 37888  
 37889  LBB4_1288:
 37890  	WORD $0xc031 // xor    eax, eax
 37891  
 37892  LBB4_1289:
 37893  	LONG $0x01c1f641                           // test    r9b, 1
 37894  	JE   LBB4_1291
 37895  	LONG $0x0c6f0ff3; BYTE $0x01               // movdqu    xmm1, oword [rcx + rax]
 37896  	LONG $0x546f0ff3; WORD $0x1001             // movdqu    xmm2, oword [rcx + rax + 16]
 37897  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 37898  	QUAD $0x00000100a56f0f66                   // movdqa    xmm4, oword 256[rbp] /* [rip + .LCPI4_22] */
 37899  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 37900  	LONG $0xc1640f66                           // pcmpgtb    xmm0, xmm1
 37901  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 37902  	LONG $0xeb740f66                           // pcmpeqb    xmm5, xmm3
 37903  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 37904  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 37905  	LONG $0xda740f66                           // pcmpeqb    xmm3, xmm2
 37906  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 37907  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 37908  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 37909  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 37910  	LONG $0x10380f66; BYTE $0xd5               // pblendvb    xmm2, xmm5, xmm0
 37911  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 37912  	LONG $0x10380f66; BYTE $0xe3               // pblendvb    xmm4, xmm3, xmm0
 37913  	LONG $0x7f0f41f3; WORD $0x0014             // movdqu    oword [r8 + rax], xmm2
 37914  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
 37915  
 37916  LBB4_1291:
 37917  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 37918  	JE   LBB4_1655
 37919  	JMP  LBB4_1292
 37920  
 37921  LBB4_1297:
 37922  	WORD $0xf631 // xor    esi, esi
 37923  
 37924  LBB4_1298:
 37925  	LONG $0x01c1f641                     // test    r9b, 1
 37926  	JE   LBB4_1300
 37927  	LONG $0x046f0ff3; BYTE $0xf1         // movdqu    xmm0, oword [rcx + 8*rsi]
 37928  	LONG $0x4c6f0ff3; WORD $0x10f1       // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 37929  	LONG $0xd2ef0f66                     // pxor    xmm2, xmm2
 37930  	LONG $0x29380f66; BYTE $0xc2         // pcmpeqq    xmm0, xmm2
 37931  	LONG $0xdb760f66                     // pcmpeqd    xmm3, xmm3
 37932  	LONG $0xc3ef0f66                     // pxor    xmm0, xmm3
 37933  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 37934  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 37935  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 37936  	QUAD $0x000000c0a56f0f66             // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 37937  	LONG $0xc4db0f66                     // pand    xmm0, xmm4
 37938  	LONG $0x29380f66; BYTE $0xca         // pcmpeqq    xmm1, xmm2
 37939  	LONG $0xcbef0f66                     // pxor    xmm1, xmm3
 37940  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 37941  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 37942  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 37943  	QUAD $0x003004153a0f4166             // pextrw    word [r8 + rsi], xmm0, 0
 37944  	LONG $0xccdb0f66                     // pand    xmm1, xmm4
 37945  	QUAD $0x02304c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm1, 0
 37946  
 37947  LBB4_1300:
 37948  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37949  	JE   LBB4_1655
 37950  	JMP  LBB4_1301
 37951  
 37952  LBB4_1305:
 37953  	WORD $0xf631 // xor    esi, esi
 37954  
 37955  LBB4_1306:
 37956  	LONG $0x01c1f641               // test    r9b, 1
 37957  	JE   LBB4_1308
 37958  	LONG $0x046f0ff3; BYTE $0x71   // movdqu    xmm0, oword [rcx + 2*rsi]
 37959  	LONG $0x4c6f0ff3; WORD $0x1071 // movdqu    xmm1, oword [rcx + 2*rsi + 16]
 37960  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 37961  	LONG $0xc2750f66               // pcmpeqw    xmm0, xmm2
 37962  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 37963  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 37964  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
 37965  	QUAD $0x000000f0a56f0f66       // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 37966  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 37967  	LONG $0xca750f66               // pcmpeqw    xmm1, xmm2
 37968  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 37969  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
 37970  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 37971  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 37972  	LONG $0x7f0f41f3; WORD $0x3004 // movdqu    oword [r8 + rsi], xmm0
 37973  
 37974  LBB4_1308:
 37975  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 37976  	JE   LBB4_1655
 37977  	JMP  LBB4_1309
 37978  
 37979  LBB4_1313:
 37980  	WORD $0xc031 // xor    eax, eax
 37981  
 37982  LBB4_1314:
 37983  	LONG $0x01c1f641               // test    r9b, 1
 37984  	JE   LBB4_1316
 37985  	LONG $0x146f0ff3; BYTE $0x41   // movdqu    xmm2, oword [rcx + 2*rax]
 37986  	LONG $0x5c6f0ff3; WORD $0x1041 // movdqu    xmm3, oword [rcx + 2*rax + 16]
 37987  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 37988  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 37989  	LONG $0xc4650f66               // pcmpgtw    xmm0, xmm4
 37990  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
 37991  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 37992  	LONG $0xcc650f66               // pcmpgtw    xmm1, xmm4
 37993  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
 37994  	LONG $0xd4750f66               // pcmpeqw    xmm2, xmm4
 37995  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 37996  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 37997  	LONG $0xd2630f66               // packsswb    xmm2, xmm2
 37998  	LONG $0xdc750f66               // pcmpeqw    xmm3, xmm4
 37999  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 38000  	LONG $0xdb630f66               // packsswb    xmm3, xmm3
 38001  	QUAD $0x000000f0a56f0f66       // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 38002  	LONG $0x10380f66; BYTE $0xd4   // pblendvb    xmm2, xmm4, xmm0
 38003  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 38004  	LONG $0x10380f66; BYTE $0xdc   // pblendvb    xmm3, xmm4, xmm0
 38005  	LONG $0xd36c0f66               // punpcklqdq    xmm2, xmm3
 38006  	LONG $0x7f0f41f3; WORD $0x0014 // movdqu    oword [r8 + rax], xmm2
 38007  
 38008  LBB4_1316:
 38009  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38010  	JE   LBB4_1655
 38011  	JMP  LBB4_1317
 38012  
 38013  LBB4_1322:
 38014  	WORD $0xc031 // xor    eax, eax
 38015  
 38016  LBB4_1323:
 38017  	LONG $0x01c1f641                     // test    r9b, 1
 38018  	JE   LBB4_1325
 38019  	LONG $0x146f0ff3; BYTE $0xc1         // movdqu    xmm2, oword [rcx + 8*rax]
 38020  	LONG $0x5c6f0ff3; WORD $0x10c1       // movdqu    xmm3, oword [rcx + 8*rax + 16]
 38021  	LONG $0xe4ef0f66                     // pxor    xmm4, xmm4
 38022  	LONG $0xc26f0f66                     // movdqa    xmm0, xmm2
 38023  	LONG $0x37380f66; BYTE $0xc4         // pcmpgtq    xmm0, xmm4
 38024  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38025  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38026  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 38027  	LONG $0xcb6f0f66                     // movdqa    xmm1, xmm3
 38028  	LONG $0x37380f66; BYTE $0xcc         // pcmpgtq    xmm1, xmm4
 38029  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38030  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38031  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 38032  	LONG $0x29380f66; BYTE $0xd4         // pcmpeqq    xmm2, xmm4
 38033  	LONG $0xed760f66                     // pcmpeqd    xmm5, xmm5
 38034  	LONG $0xd5ef0f66                     // pxor    xmm2, xmm5
 38035  	LONG $0xd26b0f66                     // packssdw    xmm2, xmm2
 38036  	LONG $0xd26b0f66                     // packssdw    xmm2, xmm2
 38037  	LONG $0xd2630f66                     // packsswb    xmm2, xmm2
 38038  	LONG $0x29380f66; BYTE $0xdc         // pcmpeqq    xmm3, xmm4
 38039  	LONG $0xddef0f66                     // pxor    xmm3, xmm5
 38040  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 38041  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 38042  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 38043  	QUAD $0x000000c0a56f0f66             // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 38044  	LONG $0x10380f66; BYTE $0xd4         // pblendvb    xmm2, xmm4, xmm0
 38045  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 38046  	LONG $0x10380f66; BYTE $0xdc         // pblendvb    xmm3, xmm4, xmm0
 38047  	QUAD $0x000014153a0f4166             // pextrw    word [r8 + rax], xmm2, 0
 38048  	QUAD $0x02005c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 2], xmm3, 0
 38049  
 38050  LBB4_1325:
 38051  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38052  	JE   LBB4_1655
 38053  	JMP  LBB4_1326
 38054  
 38055  LBB4_1331:
 38056  	WORD $0xf631 // xor    esi, esi
 38057  
 38058  LBB4_1332:
 38059  	LONG $0x01c1f641                           // test    r9b, 1
 38060  	JE   LBB4_1334
 38061  	LONG $0xb104100f                           // movups    xmm0, oword [rcx + 4*rsi]
 38062  	LONG $0xb14c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rsi + 16]
 38063  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 38064  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 38065  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 38066  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 38067  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 38068  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 38069  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 38070  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 38071  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 38072  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38073  	LONG $0xc5660f66                           // pcmpgtd    xmm0, xmm5
 38074  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 38075  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 38076  	LONG $0xcd660f66                           // pcmpgtd    xmm1, xmm5
 38077  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 38078  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 38079  	QUAD $0x00000080b56f0f66                   // movdqa    xmm6, oword 128[rbp] /* [rip + .LCPI4_12] */
 38080  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 38081  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 38082  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38083  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 38084  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38085  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 38086  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 38087  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 38088  	LONG $0x7e0f4166; WORD $0x303c             // movd    dword [r8 + rsi], xmm7
 38089  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm5
 38090  
 38091  LBB4_1334:
 38092  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38093  	JE   LBB4_1655
 38094  	JMP  LBB4_1335
 38095  
 38096  LBB4_1340:
 38097  	WORD $0xf631 // xor    esi, esi
 38098  
 38099  LBB4_1341:
 38100  	LONG $0x01c1f641                           // test    r9b, 1
 38101  	JE   LBB4_1343
 38102  	LONG $0x046f0ff3; BYTE $0x31               // movdqu    xmm0, oword [rcx + rsi]
 38103  	LONG $0x4c6f0ff3; WORD $0x1031             // movdqu    xmm1, oword [rcx + rsi + 16]
 38104  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38105  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38106  	QUAD $0x000001009d6f0f66                   // movdqa    xmm3, oword 256[rbp] /* [rip + .LCPI4_22] */
 38107  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38108  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38109  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38110  	LONG $0x7f0f41f3; WORD $0x3004             // movdqu    oword [r8 + rsi], xmm0
 38111  	LONG $0x7f0f41f3; WORD $0x304c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm1
 38112  
 38113  LBB4_1343:
 38114  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38115  	JE   LBB4_1655
 38116  	JMP  LBB4_1344
 38117  
 38118  LBB4_1348:
 38119  	WORD $0xc031 // xor    eax, eax
 38120  
 38121  LBB4_1349:
 38122  	LONG $0x01c1f641                           // test    r9b, 1
 38123  	JE   LBB4_1351
 38124  	LONG $0x146f0ff3; BYTE $0x81               // movdqu    xmm2, oword [rcx + 4*rax]
 38125  	LONG $0x5c6f0ff3; WORD $0x1081             // movdqu    xmm3, oword [rcx + 4*rax + 16]
 38126  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 38127  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38128  	LONG $0xc4660f66                           // pcmpgtd    xmm0, xmm4
 38129  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 38130  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 38131  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38132  	LONG $0xcc660f66                           // pcmpgtd    xmm1, xmm4
 38133  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 38134  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 38135  	LONG $0xd4760f66                           // pcmpeqd    xmm2, xmm4
 38136  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38137  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38138  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 38139  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 38140  	LONG $0xdc760f66                           // pcmpeqd    xmm3, xmm4
 38141  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38142  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 38143  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 38144  	QUAD $0x00000080a56f0f66                   // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 38145  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 38146  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38147  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 38148  	LONG $0x7e0f4166; WORD $0x0014             // movd    dword [r8 + rax], xmm2
 38149  	LONG $0x7e0f4166; WORD $0x005c; BYTE $0x04 // movd    dword [r8 + rax + 4], xmm3
 38150  
 38151  LBB4_1351:
 38152  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38153  	JE   LBB4_1655
 38154  	JMP  LBB4_1352
 38155  
 38156  LBB4_1357:
 38157  	WORD $0xf631 // xor    esi, esi
 38158  
 38159  LBB4_1358:
 38160  	LONG $0x01c1f641                           // test    r9b, 1
 38161  	JE   LBB4_1360
 38162  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 38163  	LONG $0xd06e0f66                           // movd    xmm2, eax
 38164  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 38165  	LONG $0xd86e0f66                           // movd    xmm3, eax
 38166  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 38167  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38168  	LONG $0xc4640f66                           // pcmpgtb    xmm0, xmm4
 38169  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 38170  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38171  	LONG $0xcc640f66                           // pcmpgtb    xmm1, xmm4
 38172  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 38173  	LONG $0xd4740f66                           // pcmpeqb    xmm2, xmm4
 38174  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38175  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38176  	LONG $0x22380f66; BYTE $0xd2               // pmovsxbq    xmm2, xmm2
 38177  	LONG $0xdc740f66                           // pcmpeqb    xmm3, xmm4
 38178  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38179  	LONG $0x22380f66; BYTE $0xdb               // pmovsxbq    xmm3, xmm3
 38180  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38181  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 38182  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38183  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 38184  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 38185  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 38186  
 38187  LBB4_1360:
 38188  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38189  	JE   LBB4_1655
 38190  	JMP  LBB4_1361
 38191  
 38192  LBB4_1366:
 38193  	WORD $0xf631 // xor    esi, esi
 38194  
 38195  LBB4_1367:
 38196  	LONG $0x01c1f641                           // test    r9b, 1
 38197  	JE   LBB4_1369
 38198  	LONG $0x046f0ff3; BYTE $0xf1               // movdqu    xmm0, oword [rcx + 8*rsi]
 38199  	LONG $0x4c6f0ff3; WORD $0x10f1             // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 38200  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38201  	LONG $0x29380f66; BYTE $0xc2               // pcmpeqq    xmm0, xmm2
 38202  	QUAD $0x000000909d6f0f66                   // movdqa    xmm3, oword 144[rbp] /* [rip + .LCPI4_15] */
 38203  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38204  	LONG $0x29380f66; BYTE $0xca               // pcmpeqq    xmm1, xmm2
 38205  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38206  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 38207  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 38208  
 38209  LBB4_1369:
 38210  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38211  	JE   LBB4_1655
 38212  	JMP  LBB4_1370
 38213  
 38214  LBB4_1374:
 38215  	WORD $0xf631 // xor    esi, esi
 38216  
 38217  LBB4_1375:
 38218  	LONG $0x01c1f641                           // test    r9b, 1
 38219  	JE   LBB4_1377
 38220  	LONG $0x0c6f0ff3; BYTE $0xf1               // movdqu    xmm1, oword [rcx + 8*rsi]
 38221  	LONG $0x546f0ff3; WORD $0x10f1             // movdqu    xmm2, oword [rcx + 8*rsi + 16]
 38222  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 38223  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38224  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 38225  	LONG $0x37380f66; BYTE $0xc1               // pcmpgtq    xmm0, xmm1
 38226  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 38227  	LONG $0x29380f66; BYTE $0xeb               // pcmpeqq    xmm5, xmm3
 38228  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 38229  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 38230  	LONG $0x29380f66; BYTE $0xda               // pcmpeqq    xmm3, xmm2
 38231  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 38232  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 38233  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 38234  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 38235  	LONG $0x15380f66; BYTE $0xd5               // blendvpd    xmm2, xmm5, xmm0
 38236  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38237  	LONG $0x15380f66; BYTE $0xe3               // blendvpd    xmm4, xmm3, xmm0
 38238  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 38239  	LONG $0x110f4166; WORD $0xf064; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm4
 38240  
 38241  LBB4_1377:
 38242  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 38243  	JE   LBB4_1655
 38244  	JMP  LBB4_1378
 38245  
 38246  LBB4_1383:
 38247  	WORD $0xf631 // xor    esi, esi
 38248  
 38249  LBB4_1384:
 38250  	LONG $0x01c1f641                           // test    r9b, 1
 38251  	JE   LBB4_1386
 38252  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 38253  	LONG $0xc06e0f66                           // movd    xmm0, eax
 38254  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 38255  	LONG $0xc86e0f66                           // movd    xmm1, eax
 38256  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38257  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38258  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 38259  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 38260  	LONG $0x32380f66; BYTE $0xc0               // pmovzxbq    xmm0, xmm0
 38261  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38262  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 38263  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38264  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 38265  	LONG $0x32380f66; BYTE $0xc9               // pmovzxbq    xmm1, xmm1
 38266  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 38267  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 38268  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 38269  
 38270  LBB4_1386:
 38271  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38272  	JE   LBB4_1655
 38273  	JMP  LBB4_1387
 38274  
 38275  LBB4_1391:
 38276  	WORD $0xf631 // xor    esi, esi
 38277  
 38278  LBB4_1392:
 38279  	LONG $0x01c1f641                           // test    r9b, 1
 38280  	JE   LBB4_1394
 38281  	LONG $0x147e0ff3; BYTE $0x31               // movq    xmm2, qword [rcx + rsi]
 38282  	LONG $0x5c7e0ff3; WORD $0x0831             // movq    xmm3, qword [rcx + rsi + 8]
 38283  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 38284  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38285  	LONG $0xc4640f66                           // pcmpgtb    xmm0, xmm4
 38286  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 38287  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38288  	LONG $0xcc640f66                           // pcmpgtb    xmm1, xmm4
 38289  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 38290  	LONG $0xd4740f66                           // pcmpeqb    xmm2, xmm4
 38291  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38292  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38293  	LONG $0x20380f66; BYTE $0xd2               // pmovsxbw    xmm2, xmm2
 38294  	LONG $0xdc740f66                           // pcmpeqb    xmm3, xmm4
 38295  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38296  	LONG $0x20380f66; BYTE $0xdb               // pmovsxbw    xmm3, xmm3
 38297  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38298  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 38299  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38300  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 38301  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 38302  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 38303  
 38304  LBB4_1394:
 38305  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38306  	JE   LBB4_1655
 38307  	JMP  LBB4_1395
 38308  
 38309  LBB4_1400:
 38310  	WORD $0xf631 // xor    esi, esi
 38311  
 38312  LBB4_1401:
 38313  	LONG $0x01c1f641                           // test    r9b, 1
 38314  	JE   LBB4_1403
 38315  	LONG $0x147e0ff3; BYTE $0x31               // movq    xmm2, qword [rcx + rsi]
 38316  	LONG $0x5c7e0ff3; WORD $0x0831             // movq    xmm3, qword [rcx + rsi + 8]
 38317  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 38318  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38319  	LONG $0xc4640f66                           // pcmpgtb    xmm0, xmm4
 38320  	LONG $0x20380f66; BYTE $0xc0               // pmovsxbw    xmm0, xmm0
 38321  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38322  	LONG $0xcc640f66                           // pcmpgtb    xmm1, xmm4
 38323  	LONG $0x20380f66; BYTE $0xc9               // pmovsxbw    xmm1, xmm1
 38324  	LONG $0xd4740f66                           // pcmpeqb    xmm2, xmm4
 38325  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38326  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38327  	LONG $0x20380f66; BYTE $0xd2               // pmovsxbw    xmm2, xmm2
 38328  	LONG $0xdc740f66                           // pcmpeqb    xmm3, xmm4
 38329  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38330  	LONG $0x20380f66; BYTE $0xdb               // pmovsxbw    xmm3, xmm3
 38331  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38332  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 38333  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38334  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 38335  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 38336  	LONG $0x7f0f41f3; WORD $0x705c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm3
 38337  
 38338  LBB4_1403:
 38339  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38340  	JE   LBB4_1655
 38341  	JMP  LBB4_1404
 38342  
 38343  LBB4_1409:
 38344  	WORD $0xf631 // xor    esi, esi
 38345  
 38346  LBB4_1410:
 38347  	LONG $0x01c1f641                           // test    r9b, 1
 38348  	JE   LBB4_1412
 38349  	LONG $0x046f0ff3; BYTE $0x71               // movdqu    xmm0, oword [rcx + 2*rsi]
 38350  	LONG $0x4c6f0ff3; WORD $0x1071             // movdqu    xmm1, oword [rcx + 2*rsi + 16]
 38351  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38352  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 38353  	QUAD $0x000000e09d6f0f66                   // movdqa    xmm3, oword 224[rbp] /* [rip + .LCPI4_20] */
 38354  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38355  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 38356  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38357  	LONG $0x7f0f41f3; WORD $0x7004             // movdqu    oword [r8 + 2*rsi], xmm0
 38358  	LONG $0x7f0f41f3; WORD $0x704c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm1
 38359  
 38360  LBB4_1412:
 38361  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38362  	JE   LBB4_1655
 38363  	JMP  LBB4_1413
 38364  
 38365  LBB4_1417:
 38366  	WORD $0xf631 // xor    esi, esi
 38367  
 38368  LBB4_1418:
 38369  	LONG $0x01c1f641                           // test    r9b, 1
 38370  	JE   LBB4_1420
 38371  	LONG $0x046f0ff3; BYTE $0x71               // movdqu    xmm0, oword [rcx + 2*rsi]
 38372  	LONG $0x4c6f0ff3; WORD $0x1071             // movdqu    xmm1, oword [rcx + 2*rsi + 16]
 38373  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38374  	LONG $0xc2750f66                           // pcmpeqw    xmm0, xmm2
 38375  	QUAD $0x000000e09d6f0f66                   // movdqa    xmm3, oword 224[rbp] /* [rip + .LCPI4_20] */
 38376  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38377  	LONG $0xca750f66                           // pcmpeqw    xmm1, xmm2
 38378  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38379  	LONG $0x7f0f41f3; WORD $0x7004             // movdqu    oword [r8 + 2*rsi], xmm0
 38380  	LONG $0x7f0f41f3; WORD $0x704c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm1
 38381  
 38382  LBB4_1420:
 38383  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38384  	JE   LBB4_1655
 38385  	JMP  LBB4_1421
 38386  
 38387  LBB4_1425:
 38388  	WORD $0xf631 // xor    esi, esi
 38389  
 38390  LBB4_1426:
 38391  	LONG $0x01c1f641                           // test    r9b, 1
 38392  	JE   LBB4_1428
 38393  	LONG $0x0c6f0ff3; BYTE $0x71               // movdqu    xmm1, oword [rcx + 2*rsi]
 38394  	LONG $0x546f0ff3; WORD $0x1071             // movdqu    xmm2, oword [rcx + 2*rsi + 16]
 38395  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 38396  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38397  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 38398  	LONG $0xc1650f66                           // pcmpgtw    xmm0, xmm1
 38399  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 38400  	LONG $0xeb750f66                           // pcmpeqw    xmm5, xmm3
 38401  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 38402  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 38403  	LONG $0xda750f66                           // pcmpeqw    xmm3, xmm2
 38404  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 38405  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 38406  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 38407  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 38408  	LONG $0x10380f66; BYTE $0xd5               // pblendvb    xmm2, xmm5, xmm0
 38409  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38410  	LONG $0x10380f66; BYTE $0xe3               // pblendvb    xmm4, xmm3, xmm0
 38411  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 38412  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm4
 38413  
 38414  LBB4_1428:
 38415  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 38416  	JE   LBB4_1655
 38417  	JMP  LBB4_1429
 38418  
 38419  LBB4_1434:
 38420  	WORD $0xf631 // xor    esi, esi
 38421  
 38422  LBB4_1435:
 38423  	LONG $0x01c1f641                           // test    r9b, 1
 38424  	JE   LBB4_1437
 38425  	LONG $0x0c6f0ff3; BYTE $0x71               // movdqu    xmm1, oword [rcx + 2*rsi]
 38426  	LONG $0x546f0ff3; WORD $0x1071             // movdqu    xmm2, oword [rcx + 2*rsi + 16]
 38427  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 38428  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38429  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 38430  	LONG $0xc1650f66                           // pcmpgtw    xmm0, xmm1
 38431  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 38432  	LONG $0xeb750f66                           // pcmpeqw    xmm5, xmm3
 38433  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 38434  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 38435  	LONG $0xda750f66                           // pcmpeqw    xmm3, xmm2
 38436  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 38437  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 38438  	LONG $0xca650f66                           // pcmpgtw    xmm1, xmm2
 38439  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 38440  	LONG $0x10380f66; BYTE $0xd5               // pblendvb    xmm2, xmm5, xmm0
 38441  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38442  	LONG $0x10380f66; BYTE $0xe3               // pblendvb    xmm4, xmm3, xmm0
 38443  	LONG $0x7f0f41f3; WORD $0x7014             // movdqu    oword [r8 + 2*rsi], xmm2
 38444  	LONG $0x7f0f41f3; WORD $0x7064; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm4
 38445  
 38446  LBB4_1437:
 38447  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 38448  	JE   LBB4_1655
 38449  	JMP  LBB4_1438
 38450  
 38451  LBB4_1443:
 38452  	WORD $0xf631 // xor    esi, esi
 38453  
 38454  LBB4_1444:
 38455  	LONG $0x01c1f641                           // test    r9b, 1
 38456  	JE   LBB4_1446
 38457  	LONG $0x047e0ff3; BYTE $0x31               // movq    xmm0, qword [rcx + rsi]
 38458  	LONG $0x4c7e0ff3; WORD $0x0831             // movq    xmm1, qword [rcx + rsi + 8]
 38459  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38460  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38461  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 38462  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 38463  	LONG $0x30380f66; BYTE $0xc0               // pmovzxbw    xmm0, xmm0
 38464  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38465  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 38466  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38467  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 38468  	LONG $0x30380f66; BYTE $0xc9               // pmovzxbw    xmm1, xmm1
 38469  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 38470  	LONG $0x7f0f41f3; WORD $0x7004             // movdqu    oword [r8 + 2*rsi], xmm0
 38471  	LONG $0x7f0f41f3; WORD $0x704c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm1
 38472  
 38473  LBB4_1446:
 38474  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38475  	JE   LBB4_1655
 38476  	JMP  LBB4_1447
 38477  
 38478  LBB4_1451:
 38479  	WORD $0xf631 // xor    esi, esi
 38480  
 38481  LBB4_1452:
 38482  	LONG $0x01c1f641                           // test    r9b, 1
 38483  	JE   LBB4_1454
 38484  	LONG $0x047e0ff3; BYTE $0x31               // movq    xmm0, qword [rcx + rsi]
 38485  	LONG $0x4c7e0ff3; WORD $0x0831             // movq    xmm1, qword [rcx + rsi + 8]
 38486  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38487  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38488  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 38489  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 38490  	LONG $0x30380f66; BYTE $0xc0               // pmovzxbw    xmm0, xmm0
 38491  	QUAD $0x000000e0a56f0f66                   // movdqa    xmm4, oword 224[rbp] /* [rip + .LCPI4_20] */
 38492  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 38493  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38494  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 38495  	LONG $0x30380f66; BYTE $0xc9               // pmovzxbw    xmm1, xmm1
 38496  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 38497  	LONG $0x7f0f41f3; WORD $0x7004             // movdqu    oword [r8 + 2*rsi], xmm0
 38498  	LONG $0x7f0f41f3; WORD $0x704c; BYTE $0x10 // movdqu    oword [r8 + 2*rsi + 16], xmm1
 38499  
 38500  LBB4_1454:
 38501  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38502  	JE   LBB4_1655
 38503  	JMP  LBB4_1455
 38504  
 38505  LBB4_1459:
 38506  	WORD $0xf631 // xor    esi, esi
 38507  
 38508  LBB4_1460:
 38509  	LONG $0x01c1f641                           // test    r9b, 1
 38510  	JE   LBB4_1462
 38511  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 38512  	LONG $0xd06e0f66                           // movd    xmm2, eax
 38513  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 38514  	LONG $0xd86e0f66                           // movd    xmm3, eax
 38515  	LONG $0xe4570f66                           // xorpd    xmm4, xmm4
 38516  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38517  	LONG $0xc4640f66                           // pcmpgtb    xmm0, xmm4
 38518  	LONG $0x22380f66; BYTE $0xc0               // pmovsxbq    xmm0, xmm0
 38519  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38520  	LONG $0xcc640f66                           // pcmpgtb    xmm1, xmm4
 38521  	LONG $0x22380f66; BYTE $0xc9               // pmovsxbq    xmm1, xmm1
 38522  	LONG $0xd4740f66                           // pcmpeqb    xmm2, xmm4
 38523  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38524  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38525  	LONG $0x22380f66; BYTE $0xd2               // pmovsxbq    xmm2, xmm2
 38526  	LONG $0xdc740f66                           // pcmpeqb    xmm3, xmm4
 38527  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38528  	LONG $0x22380f66; BYTE $0xdb               // pmovsxbq    xmm3, xmm3
 38529  	QUAD $0x00000090a5280f66                   // movapd    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38530  	LONG $0x15380f66; BYTE $0xd4               // blendvpd    xmm2, xmm4, xmm0
 38531  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38532  	LONG $0x15380f66; BYTE $0xdc               // blendvpd    xmm3, xmm4, xmm0
 38533  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 38534  	LONG $0x110f4166; WORD $0xf05c; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm3
 38535  
 38536  LBB4_1462:
 38537  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38538  	JE   LBB4_1655
 38539  	JMP  LBB4_1463
 38540  
 38541  LBB4_1468:
 38542  	WORD $0xf631 // xor    esi, esi
 38543  
 38544  LBB4_1469:
 38545  	LONG $0x01c1f641                           // test    r9b, 1
 38546  	JE   LBB4_1471
 38547  	LONG $0x146e0f66; BYTE $0x31               // movd    xmm2, dword [rcx + rsi]
 38548  	LONG $0x5c6e0f66; WORD $0x0431             // movd    xmm3, dword [rcx + rsi + 4]
 38549  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 38550  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38551  	LONG $0xc4640f66                           // pcmpgtb    xmm0, xmm4
 38552  	LONG $0x21380f66; BYTE $0xc0               // pmovsxbd    xmm0, xmm0
 38553  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 38554  	LONG $0xcc640f66                           // pcmpgtb    xmm1, xmm4
 38555  	LONG $0x21380f66; BYTE $0xc9               // pmovsxbd    xmm1, xmm1
 38556  	LONG $0xd4740f66                           // pcmpeqb    xmm2, xmm4
 38557  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38558  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 38559  	LONG $0x21380f66; BYTE $0xd2               // pmovsxbd    xmm2, xmm2
 38560  	WORD $0x5b0f; BYTE $0xd2                   // cvtdq2ps    xmm2, xmm2
 38561  	LONG $0xdc740f66                           // pcmpeqb    xmm3, xmm4
 38562  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 38563  	LONG $0x21380f66; BYTE $0xdb               // pmovsxbd    xmm3, xmm3
 38564  	WORD $0x5b0f; BYTE $0xdb                   // cvtdq2ps    xmm3, xmm3
 38565  	LONG $0xd0a5280f; WORD $0x0000; BYTE $0x00 // movaps    xmm4, oword 208[rbp] /* [rip + .LCPI4_19] */
 38566  	LONG $0x14380f66; BYTE $0xd4               // blendvps    xmm2, xmm4, xmm0
 38567  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38568  	LONG $0x14380f66; BYTE $0xdc               // blendvps    xmm3, xmm4, xmm0
 38569  	LONG $0x14110f41; BYTE $0xb0               // movups    oword [r8 + 4*rsi], xmm2
 38570  	LONG $0x5c110f41; WORD $0x10b0             // movups    oword [r8 + 4*rsi + 16], xmm3
 38571  
 38572  LBB4_1471:
 38573  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38574  	JE   LBB4_1655
 38575  	JMP  LBB4_1472
 38576  
 38577  LBB4_1490:
 38578  	WORD $0xf631 // xor    esi, esi
 38579  
 38580  LBB4_1491:
 38581  	LONG $0x01c1f641                           // test    r9b, 1
 38582  	JE   LBB4_1493
 38583  	LONG $0x046f0ff3; BYTE $0xf1               // movdqu    xmm0, oword [rcx + 8*rsi]
 38584  	LONG $0x4c6f0ff3; WORD $0x10f1             // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 38585  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38586  	LONG $0x29380f66; BYTE $0xc2               // pcmpeqq    xmm0, xmm2
 38587  	QUAD $0x000000909d6f0f66                   // movdqa    xmm3, oword 144[rbp] /* [rip + .LCPI4_15] */
 38588  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38589  	LONG $0x29380f66; BYTE $0xca               // pcmpeqq    xmm1, xmm2
 38590  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38591  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 38592  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 38593  
 38594  LBB4_1493:
 38595  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38596  	JE   LBB4_1655
 38597  	JMP  LBB4_1494
 38598  
 38599  LBB4_1498:
 38600  	WORD $0xf631 // xor    esi, esi
 38601  
 38602  LBB4_1499:
 38603  	LONG $0x01c1f641                           // test    r9b, 1
 38604  	JE   LBB4_1501
 38605  	LONG $0x0c6f0ff3; BYTE $0xf1               // movdqu    xmm1, oword [rcx + 8*rsi]
 38606  	LONG $0x546f0ff3; WORD $0x10f1             // movdqu    xmm2, oword [rcx + 8*rsi + 16]
 38607  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 38608  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38609  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 38610  	LONG $0x37380f66; BYTE $0xc1               // pcmpgtq    xmm0, xmm1
 38611  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 38612  	LONG $0x29380f66; BYTE $0xeb               // pcmpeqq    xmm5, xmm3
 38613  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 38614  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 38615  	LONG $0x29380f66; BYTE $0xda               // pcmpeqq    xmm3, xmm2
 38616  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 38617  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 38618  	LONG $0x37380f66; BYTE $0xca               // pcmpgtq    xmm1, xmm2
 38619  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 38620  	LONG $0x15380f66; BYTE $0xd5               // blendvpd    xmm2, xmm5, xmm0
 38621  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38622  	LONG $0x15380f66; BYTE $0xe3               // blendvpd    xmm4, xmm3, xmm0
 38623  	LONG $0x110f4166; WORD $0xf014             // movupd    oword [r8 + 8*rsi], xmm2
 38624  	LONG $0x110f4166; WORD $0xf064; BYTE $0x10 // movupd    oword [r8 + 8*rsi + 16], xmm4
 38625  
 38626  LBB4_1501:
 38627  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 38628  	JE   LBB4_1655
 38629  	JMP  LBB4_1502
 38630  
 38631  LBB4_1507:
 38632  	WORD $0xf631 // xor    esi, esi
 38633  
 38634  LBB4_1508:
 38635  	LONG $0x01c1f641                           // test    r9b, 1
 38636  	JE   LBB4_1510
 38637  	LONG $0x3104b70f                           // movzx    eax, word [rcx + rsi]
 38638  	LONG $0xc06e0f66                           // movd    xmm0, eax
 38639  	LONG $0x3144b70f; BYTE $0x02               // movzx    eax, word [rcx + rsi + 2]
 38640  	LONG $0xc86e0f66                           // movd    xmm1, eax
 38641  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38642  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38643  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 38644  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 38645  	LONG $0x32380f66; BYTE $0xc0               // pmovzxbq    xmm0, xmm0
 38646  	QUAD $0x00000090a56f0f66                   // movdqa    xmm4, oword 144[rbp] /* [rip + .LCPI4_15] */
 38647  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 38648  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38649  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 38650  	LONG $0x32380f66; BYTE $0xc9               // pmovzxbq    xmm1, xmm1
 38651  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 38652  	LONG $0x7f0f41f3; WORD $0xf004             // movdqu    oword [r8 + 8*rsi], xmm0
 38653  	LONG $0x7f0f41f3; WORD $0xf04c; BYTE $0x10 // movdqu    oword [r8 + 8*rsi + 16], xmm1
 38654  
 38655  LBB4_1510:
 38656  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38657  	JE   LBB4_1655
 38658  	JMP  LBB4_1511
 38659  
 38660  LBB4_1515:
 38661  	WORD $0xf631 // xor    esi, esi
 38662  
 38663  LBB4_1516:
 38664  	LONG $0x01c1f641               // test    r9b, 1
 38665  	JE   LBB4_1518
 38666  	LONG $0x046e0f66; BYTE $0x31   // movd    xmm0, dword [rcx + rsi]
 38667  	LONG $0x4c6e0f66; WORD $0x0431 // movd    xmm1, dword [rcx + rsi + 4]
 38668  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 38669  	LONG $0xc2740f66               // pcmpeqb    xmm0, xmm2
 38670  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 38671  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 38672  	LONG $0x31380f66; BYTE $0xc0   // pmovzxbd    xmm0, xmm0
 38673  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 38674  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 38675  	WORD $0x5b0f; BYTE $0xc0       // cvtdq2ps    xmm0, xmm0
 38676  	LONG $0xca740f66               // pcmpeqb    xmm1, xmm2
 38677  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 38678  	LONG $0x31380f66; BYTE $0xc9   // pmovzxbd    xmm1, xmm1
 38679  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 38680  	WORD $0x5b0f; BYTE $0xc9       // cvtdq2ps    xmm1, xmm1
 38681  	LONG $0x04110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm0
 38682  	LONG $0x4c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm1
 38683  
 38684  LBB4_1518:
 38685  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38686  	JE   LBB4_1655
 38687  	JMP  LBB4_1519
 38688  
 38689  LBB4_1535:
 38690  	WORD $0xf631 // xor    esi, esi
 38691  
 38692  LBB4_1536:
 38693  	LONG $0x01c1f641                           // test    r9b, 1
 38694  	JE   LBB4_1538
 38695  	LONG $0x046f0ff3; BYTE $0xb1               // movdqu    xmm0, oword [rcx + 4*rsi]
 38696  	LONG $0x4c6f0ff3; WORD $0x10b1             // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 38697  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38698  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 38699  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 38700  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 38701  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 38702  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 38703  	QUAD $0x00000080a56f0f66                   // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 38704  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 38705  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 38706  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 38707  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 38708  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 38709  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 38710  	LONG $0x7e0f4166; WORD $0x3004             // movd    dword [r8 + rsi], xmm0
 38711  	LONG $0x7e0f4166; WORD $0x304c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm1
 38712  
 38713  LBB4_1538:
 38714  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38715  	JE   LBB4_1655
 38716  	JMP  LBB4_1539
 38717  
 38718  LBB4_1543:
 38719  	WORD $0xf631 // xor    esi, esi
 38720  
 38721  LBB4_1544:
 38722  	LONG $0x01c1f641                     // test    r9b, 1
 38723  	JE   LBB4_1546
 38724  	LONG $0x1c100f66; BYTE $0xf1         // movupd    xmm3, oword [rcx + 8*rsi]
 38725  	LONG $0x64100f66; WORD $0x10f1       // movupd    xmm4, oword [rcx + 8*rsi + 16]
 38726  	LONG $0xd2570f66                     // xorpd    xmm2, xmm2
 38727  	LONG $0xc3280f66                     // movapd    xmm0, xmm3
 38728  	LONG $0xc2c20f66; BYTE $0x00         // cmpeqpd    xmm0, xmm2
 38729  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38730  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38731  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 38732  	LONG $0xcc280f66                     // movapd    xmm1, xmm4
 38733  	LONG $0xcac20f66; BYTE $0x00         // cmpeqpd    xmm1, xmm2
 38734  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38735  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38736  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 38737  	LONG $0x6d280f66; BYTE $0x00         // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI4_0] */
 38738  	LONG $0xdd540f66                     // andpd    xmm3, xmm5
 38739  	LONG $0x75280f66; BYTE $0x10         // movapd    xmm6, oword 16[rbp] /* [rip + .LCPI4_1] */
 38740  	LONG $0xde560f66                     // orpd    xmm3, xmm6
 38741  	LONG $0xe5540f66                     // andpd    xmm4, xmm5
 38742  	LONG $0xe6560f66                     // orpd    xmm4, xmm6
 38743  	LONG $0xdbe60f66                     // cvttpd2dq    xmm3, xmm3
 38744  	LONG $0x6d6f0f66; BYTE $0x40         // movdqa    xmm5, oword 64[rbp] /* [rip + .LCPI4_7] */
 38745  	LONG $0x00380f66; BYTE $0xdd         // pshufb    xmm3, xmm5
 38746  	LONG $0xe4e60f66                     // cvttpd2dq    xmm4, xmm4
 38747  	LONG $0x00380f66; BYTE $0xe5         // pshufb    xmm4, xmm5
 38748  	LONG $0x10380f66; BYTE $0xda         // pblendvb    xmm3, xmm2, xmm0
 38749  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 38750  	LONG $0x10380f66; BYTE $0xe2         // pblendvb    xmm4, xmm2, xmm0
 38751  	QUAD $0x00301c153a0f4166             // pextrw    word [r8 + rsi], xmm3, 0
 38752  	QUAD $0x023064153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm4, 0
 38753  
 38754  LBB4_1546:
 38755  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38756  	JE   LBB4_1655
 38757  	JMP  LBB4_1547
 38758  
 38759  LBB4_1552:
 38760  	WORD $0xc031 // xor    eax, eax
 38761  
 38762  LBB4_1553:
 38763  	LONG $0x01c1f641                           // test    r9b, 1
 38764  	JE   LBB4_1555
 38765  	LONG $0x0c6f0ff3; BYTE $0x01               // movdqu    xmm1, oword [rcx + rax]
 38766  	LONG $0x546f0ff3; WORD $0x1001             // movdqu    xmm2, oword [rcx + rax + 16]
 38767  	LONG $0xdbef0f66                           // pxor    xmm3, xmm3
 38768  	QUAD $0x00000100a56f0f66                   // movdqa    xmm4, oword 256[rbp] /* [rip + .LCPI4_22] */
 38769  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
 38770  	LONG $0xc1640f66                           // pcmpgtb    xmm0, xmm1
 38771  	LONG $0xe96f0f66                           // movdqa    xmm5, xmm1
 38772  	LONG $0xeb740f66                           // pcmpeqb    xmm5, xmm3
 38773  	LONG $0xc9760f66                           // pcmpeqd    xmm1, xmm1
 38774  	LONG $0xe9ef0f66                           // pxor    xmm5, xmm1
 38775  	LONG $0xda740f66                           // pcmpeqb    xmm3, xmm2
 38776  	LONG $0xd9ef0f66                           // pxor    xmm3, xmm1
 38777  	LONG $0xcc6f0f66                           // movdqa    xmm1, xmm4
 38778  	LONG $0xca640f66                           // pcmpgtb    xmm1, xmm2
 38779  	LONG $0xd46f0f66                           // movdqa    xmm2, xmm4
 38780  	LONG $0x10380f66; BYTE $0xd5               // pblendvb    xmm2, xmm5, xmm0
 38781  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38782  	LONG $0x10380f66; BYTE $0xe3               // pblendvb    xmm4, xmm3, xmm0
 38783  	LONG $0x7f0f41f3; WORD $0x0014             // movdqu    oword [r8 + rax], xmm2
 38784  	LONG $0x7f0f41f3; WORD $0x0064; BYTE $0x10 // movdqu    oword [r8 + rax + 16], xmm4
 38785  
 38786  LBB4_1555:
 38787  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38788  	JE   LBB4_1655
 38789  	JMP  LBB4_1556
 38790  
 38791  LBB4_1561:
 38792  	WORD $0xf631 // xor    esi, esi
 38793  
 38794  LBB4_1562:
 38795  	LONG $0x01c1f641                     // test    r9b, 1
 38796  	JE   LBB4_1564
 38797  	LONG $0x046f0ff3; BYTE $0xf1         // movdqu    xmm0, oword [rcx + 8*rsi]
 38798  	LONG $0x4c6f0ff3; WORD $0x10f1       // movdqu    xmm1, oword [rcx + 8*rsi + 16]
 38799  	LONG $0xd2ef0f66                     // pxor    xmm2, xmm2
 38800  	LONG $0x29380f66; BYTE $0xc2         // pcmpeqq    xmm0, xmm2
 38801  	LONG $0xdb760f66                     // pcmpeqd    xmm3, xmm3
 38802  	LONG $0xc3ef0f66                     // pxor    xmm0, xmm3
 38803  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38804  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38805  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 38806  	QUAD $0x000000c0a56f0f66             // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 38807  	LONG $0xc4db0f66                     // pand    xmm0, xmm4
 38808  	LONG $0x29380f66; BYTE $0xca         // pcmpeqq    xmm1, xmm2
 38809  	LONG $0xcbef0f66                     // pxor    xmm1, xmm3
 38810  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38811  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38812  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 38813  	QUAD $0x003004153a0f4166             // pextrw    word [r8 + rsi], xmm0, 0
 38814  	LONG $0xccdb0f66                     // pand    xmm1, xmm4
 38815  	QUAD $0x02304c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rsi + 2], xmm1, 0
 38816  
 38817  LBB4_1564:
 38818  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38819  	JE   LBB4_1655
 38820  	JMP  LBB4_1565
 38821  
 38822  LBB4_1569:
 38823  	WORD $0xf631 // xor    esi, esi
 38824  
 38825  LBB4_1570:
 38826  	LONG $0x01c1f641               // test    r9b, 1
 38827  	JE   LBB4_1572
 38828  	LONG $0x046f0ff3; BYTE $0x71   // movdqu    xmm0, oword [rcx + 2*rsi]
 38829  	LONG $0x4c6f0ff3; WORD $0x1071 // movdqu    xmm1, oword [rcx + 2*rsi + 16]
 38830  	LONG $0xd2ef0f66               // pxor    xmm2, xmm2
 38831  	LONG $0xc2750f66               // pcmpeqw    xmm0, xmm2
 38832  	LONG $0xdb760f66               // pcmpeqd    xmm3, xmm3
 38833  	LONG $0xc3ef0f66               // pxor    xmm0, xmm3
 38834  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
 38835  	QUAD $0x000000f0a56f0f66       // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 38836  	LONG $0xc4db0f66               // pand    xmm0, xmm4
 38837  	LONG $0xca750f66               // pcmpeqw    xmm1, xmm2
 38838  	LONG $0xcbef0f66               // pxor    xmm1, xmm3
 38839  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
 38840  	LONG $0xccdb0f66               // pand    xmm1, xmm4
 38841  	LONG $0xc16c0f66               // punpcklqdq    xmm0, xmm1
 38842  	LONG $0x7f0f41f3; WORD $0x3004 // movdqu    oword [r8 + rsi], xmm0
 38843  
 38844  LBB4_1572:
 38845  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38846  	JE   LBB4_1655
 38847  	JMP  LBB4_1573
 38848  
 38849  LBB4_1577:
 38850  	WORD $0xc031 // xor    eax, eax
 38851  
 38852  LBB4_1578:
 38853  	LONG $0x01c1f641               // test    r9b, 1
 38854  	JE   LBB4_1580
 38855  	LONG $0x146f0ff3; BYTE $0x41   // movdqu    xmm2, oword [rcx + 2*rax]
 38856  	LONG $0x5c6f0ff3; WORD $0x1041 // movdqu    xmm3, oword [rcx + 2*rax + 16]
 38857  	LONG $0xe4ef0f66               // pxor    xmm4, xmm4
 38858  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 38859  	LONG $0xc4650f66               // pcmpgtw    xmm0, xmm4
 38860  	LONG $0xc0630f66               // packsswb    xmm0, xmm0
 38861  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 38862  	LONG $0xcc650f66               // pcmpgtw    xmm1, xmm4
 38863  	LONG $0xc9630f66               // packsswb    xmm1, xmm1
 38864  	LONG $0xd4750f66               // pcmpeqw    xmm2, xmm4
 38865  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 38866  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 38867  	LONG $0xd2630f66               // packsswb    xmm2, xmm2
 38868  	LONG $0xdc750f66               // pcmpeqw    xmm3, xmm4
 38869  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 38870  	LONG $0xdb630f66               // packsswb    xmm3, xmm3
 38871  	QUAD $0x000000f0a56f0f66       // movdqa    xmm4, oword 240[rbp] /* [rip + .LCPI4_21] */
 38872  	LONG $0x10380f66; BYTE $0xd4   // pblendvb    xmm2, xmm4, xmm0
 38873  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 38874  	LONG $0x10380f66; BYTE $0xdc   // pblendvb    xmm3, xmm4, xmm0
 38875  	LONG $0xd36c0f66               // punpcklqdq    xmm2, xmm3
 38876  	LONG $0x7f0f41f3; WORD $0x0014 // movdqu    oword [r8 + rax], xmm2
 38877  
 38878  LBB4_1580:
 38879  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38880  	JE   LBB4_1655
 38881  	JMP  LBB4_1581
 38882  
 38883  LBB4_1586:
 38884  	WORD $0xc031 // xor    eax, eax
 38885  
 38886  LBB4_1587:
 38887  	LONG $0x01c1f641                     // test    r9b, 1
 38888  	JE   LBB4_1589
 38889  	LONG $0x146f0ff3; BYTE $0xc1         // movdqu    xmm2, oword [rcx + 8*rax]
 38890  	LONG $0x5c6f0ff3; WORD $0x10c1       // movdqu    xmm3, oword [rcx + 8*rax + 16]
 38891  	LONG $0xe4ef0f66                     // pxor    xmm4, xmm4
 38892  	LONG $0xc26f0f66                     // movdqa    xmm0, xmm2
 38893  	LONG $0x37380f66; BYTE $0xc4         // pcmpgtq    xmm0, xmm4
 38894  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38895  	LONG $0xc06b0f66                     // packssdw    xmm0, xmm0
 38896  	LONG $0xc0630f66                     // packsswb    xmm0, xmm0
 38897  	LONG $0xcb6f0f66                     // movdqa    xmm1, xmm3
 38898  	LONG $0x37380f66; BYTE $0xcc         // pcmpgtq    xmm1, xmm4
 38899  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38900  	LONG $0xc96b0f66                     // packssdw    xmm1, xmm1
 38901  	LONG $0xc9630f66                     // packsswb    xmm1, xmm1
 38902  	LONG $0x29380f66; BYTE $0xd4         // pcmpeqq    xmm2, xmm4
 38903  	LONG $0xed760f66                     // pcmpeqd    xmm5, xmm5
 38904  	LONG $0xd5ef0f66                     // pxor    xmm2, xmm5
 38905  	LONG $0xd26b0f66                     // packssdw    xmm2, xmm2
 38906  	LONG $0xd26b0f66                     // packssdw    xmm2, xmm2
 38907  	LONG $0xd2630f66                     // packsswb    xmm2, xmm2
 38908  	LONG $0x29380f66; BYTE $0xdc         // pcmpeqq    xmm3, xmm4
 38909  	LONG $0xddef0f66                     // pxor    xmm3, xmm5
 38910  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 38911  	LONG $0xdb6b0f66                     // packssdw    xmm3, xmm3
 38912  	LONG $0xdb630f66                     // packsswb    xmm3, xmm3
 38913  	QUAD $0x000000c0a56f0f66             // movdqa    xmm4, oword 192[rbp] /* [rip + .LCPI4_18] */
 38914  	LONG $0x10380f66; BYTE $0xd4         // pblendvb    xmm2, xmm4, xmm0
 38915  	LONG $0xc16f0f66                     // movdqa    xmm0, xmm1
 38916  	LONG $0x10380f66; BYTE $0xdc         // pblendvb    xmm3, xmm4, xmm0
 38917  	QUAD $0x000014153a0f4166             // pextrw    word [r8 + rax], xmm2, 0
 38918  	QUAD $0x02005c153a0f4166; BYTE $0x00 // pextrw    word [r8 + rax + 2], xmm3, 0
 38919  
 38920  LBB4_1589:
 38921  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 38922  	JE   LBB4_1655
 38923  	JMP  LBB4_1590
 38924  
 38925  LBB4_1595:
 38926  	WORD $0xf631 // xor    esi, esi
 38927  
 38928  LBB4_1596:
 38929  	LONG $0x01c1f641                           // test    r9b, 1
 38930  	JE   LBB4_1598
 38931  	LONG $0xb104100f                           // movups    xmm0, oword [rcx + 4*rsi]
 38932  	LONG $0xb14c100f; BYTE $0x10               // movups    xmm1, oword [rcx + 4*rsi + 16]
 38933  	WORD $0x570f; BYTE $0xe4                   // xorps    xmm4, xmm4
 38934  	WORD $0x280f; BYTE $0xd0                   // movaps    xmm2, xmm0
 38935  	LONG $0x00d4c20f                           // cmpeqps    xmm2, xmm4
 38936  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 38937  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 38938  	WORD $0x280f; BYTE $0xd9                   // movaps    xmm3, xmm1
 38939  	LONG $0x00dcc20f                           // cmpeqps    xmm3, xmm4
 38940  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 38941  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 38942  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 38943  	LONG $0xc5660f66                           // pcmpgtd    xmm0, xmm5
 38944  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 38945  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 38946  	LONG $0xcd660f66                           // pcmpgtd    xmm1, xmm5
 38947  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 38948  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 38949  	QUAD $0x00000080b56f0f66                   // movdqa    xmm6, oword 128[rbp] /* [rip + .LCPI4_12] */
 38950  	LONG $0xff760f66                           // pcmpeqd    xmm7, xmm7
 38951  	LONG $0x10380f66; BYTE $0xfe               // pblendvb    xmm7, xmm6, xmm0
 38952  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 38953  	LONG $0x10380f66; BYTE $0xee               // pblendvb    xmm5, xmm6, xmm0
 38954  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38955  	LONG $0x10380f66; BYTE $0xfc               // pblendvb    xmm7, xmm4, xmm0
 38956  	LONG $0xc36f0f66                           // movdqa    xmm0, xmm3
 38957  	LONG $0x10380f66; BYTE $0xec               // pblendvb    xmm5, xmm4, xmm0
 38958  	LONG $0x7e0f4166; WORD $0x303c             // movd    dword [r8 + rsi], xmm7
 38959  	LONG $0x7e0f4166; WORD $0x306c; BYTE $0x04 // movd    dword [r8 + rsi + 4], xmm5
 38960  
 38961  LBB4_1598:
 38962  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 38963  	JE   LBB4_1655
 38964  	JMP  LBB4_1599
 38965  
 38966  LBB4_1604:
 38967  	WORD $0xf631 // xor    esi, esi
 38968  
 38969  LBB4_1605:
 38970  	LONG $0x01c1f641                           // test    r9b, 1
 38971  	JE   LBB4_1607
 38972  	LONG $0x046f0ff3; BYTE $0x31               // movdqu    xmm0, oword [rcx + rsi]
 38973  	LONG $0x4c6f0ff3; WORD $0x1031             // movdqu    xmm1, oword [rcx + rsi + 16]
 38974  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 38975  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 38976  	QUAD $0x000001009d6f0f66                   // movdqa    xmm3, oword 256[rbp] /* [rip + .LCPI4_22] */
 38977  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 38978  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 38979  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 38980  	LONG $0x7f0f41f3; WORD $0x3004             // movdqu    oword [r8 + rsi], xmm0
 38981  	LONG $0x7f0f41f3; WORD $0x304c; BYTE $0x10 // movdqu    oword [r8 + rsi + 16], xmm1
 38982  
 38983  LBB4_1607:
 38984  	WORD $0x3948; BYTE $0xc2 // cmp    rdx, rax
 38985  	JE   LBB4_1655
 38986  	JMP  LBB4_1608
 38987  
 38988  LBB4_1612:
 38989  	WORD $0xc031 // xor    eax, eax
 38990  
 38991  LBB4_1613:
 38992  	LONG $0x01c1f641                           // test    r9b, 1
 38993  	JE   LBB4_1615
 38994  	LONG $0x146f0ff3; BYTE $0x81               // movdqu    xmm2, oword [rcx + 4*rax]
 38995  	LONG $0x5c6f0ff3; WORD $0x1081             // movdqu    xmm3, oword [rcx + 4*rax + 16]
 38996  	LONG $0xe4ef0f66                           // pxor    xmm4, xmm4
 38997  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
 38998  	LONG $0xc4660f66                           // pcmpgtd    xmm0, xmm4
 38999  	LONG $0xc06b0f66                           // packssdw    xmm0, xmm0
 39000  	LONG $0xc0630f66                           // packsswb    xmm0, xmm0
 39001  	LONG $0xcb6f0f66                           // movdqa    xmm1, xmm3
 39002  	LONG $0xcc660f66                           // pcmpgtd    xmm1, xmm4
 39003  	LONG $0xc96b0f66                           // packssdw    xmm1, xmm1
 39004  	LONG $0xc9630f66                           // packsswb    xmm1, xmm1
 39005  	LONG $0xd4760f66                           // pcmpeqd    xmm2, xmm4
 39006  	LONG $0xed760f66                           // pcmpeqd    xmm5, xmm5
 39007  	LONG $0xd5ef0f66                           // pxor    xmm2, xmm5
 39008  	LONG $0xd26b0f66                           // packssdw    xmm2, xmm2
 39009  	LONG $0xd2630f66                           // packsswb    xmm2, xmm2
 39010  	LONG $0xdc760f66                           // pcmpeqd    xmm3, xmm4
 39011  	LONG $0xddef0f66                           // pxor    xmm3, xmm5
 39012  	LONG $0xdb6b0f66                           // packssdw    xmm3, xmm3
 39013  	LONG $0xdb630f66                           // packsswb    xmm3, xmm3
 39014  	QUAD $0x00000080a56f0f66                   // movdqa    xmm4, oword 128[rbp] /* [rip + .LCPI4_12] */
 39015  	LONG $0x10380f66; BYTE $0xd4               // pblendvb    xmm2, xmm4, xmm0
 39016  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
 39017  	LONG $0x10380f66; BYTE $0xdc               // pblendvb    xmm3, xmm4, xmm0
 39018  	LONG $0x7e0f4166; WORD $0x0014             // movd    dword [r8 + rax], xmm2
 39019  	LONG $0x7e0f4166; WORD $0x005c; BYTE $0x04 // movd    dword [r8 + rax + 4], xmm3
 39020  
 39021  LBB4_1615:
 39022  	WORD $0x394c; BYTE $0xd6 // cmp    rsi, r10
 39023  	JE   LBB4_1655
 39024  	JMP  LBB4_1616
 39025  
 39026  LBB4_1621:
 39027  	WORD $0xf631 // xor    esi, esi
 39028  
 39029  LBB4_1622:
 39030  	LONG $0x01c1f641                           // test    r9b, 1
 39031  	JE   LBB4_1624
 39032  	LONG $0x046f0ff3; BYTE $0xb1               // movdqu    xmm0, oword [rcx + 4*rsi]
 39033  	LONG $0x4c6f0ff3; WORD $0x10b1             // movdqu    xmm1, oword [rcx + 4*rsi + 16]
 39034  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 39035  	LONG $0xc2760f66                           // pcmpeqd    xmm0, xmm2
 39036  	LONG $0x5d6f0f66; BYTE $0x50               // movdqa    xmm3, oword 80[rbp] /* [rip + .LCPI4_8] */
 39037  	LONG $0xc3df0f66                           // pandn    xmm0, xmm3
 39038  	LONG $0xca760f66                           // pcmpeqd    xmm1, xmm2
 39039  	LONG $0xcbdf0f66                           // pandn    xmm1, xmm3
 39040  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 39041  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 39042  
 39043  LBB4_1624:
 39044  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 39045  	JE   LBB4_1655
 39046  	JMP  LBB4_1625
 39047  
 39048  LBB4_1629:
 39049  	WORD $0xf631 // xor    esi, esi
 39050  
 39051  LBB4_1630:
 39052  	LONG $0x01c1f641               // test    r9b, 1
 39053  	JE   LBB4_1632
 39054  	LONG $0x146e0f66; BYTE $0x31   // movd    xmm2, dword [rcx + rsi]
 39055  	LONG $0x5c6e0f66; WORD $0x0431 // movd    xmm3, dword [rcx + rsi + 4]
 39056  	WORD $0x570f; BYTE $0xe4       // xorps    xmm4, xmm4
 39057  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
 39058  	LONG $0xc4640f66               // pcmpgtb    xmm0, xmm4
 39059  	LONG $0x21380f66; BYTE $0xc0   // pmovsxbd    xmm0, xmm0
 39060  	LONG $0xcb6f0f66               // movdqa    xmm1, xmm3
 39061  	LONG $0xcc640f66               // pcmpgtb    xmm1, xmm4
 39062  	LONG $0x21380f66; BYTE $0xc9   // pmovsxbd    xmm1, xmm1
 39063  	LONG $0xd4740f66               // pcmpeqb    xmm2, xmm4
 39064  	LONG $0xed760f66               // pcmpeqd    xmm5, xmm5
 39065  	LONG $0xd5ef0f66               // pxor    xmm2, xmm5
 39066  	LONG $0x21380f66; BYTE $0xd2   // pmovsxbd    xmm2, xmm2
 39067  	LONG $0xdc740f66               // pcmpeqb    xmm3, xmm4
 39068  	LONG $0xddef0f66               // pxor    xmm3, xmm5
 39069  	LONG $0x21380f66; BYTE $0xdb   // pmovsxbd    xmm3, xmm3
 39070  	LONG $0x5065280f               // movaps    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 39071  	LONG $0x14380f66; BYTE $0xd4   // blendvps    xmm2, xmm4, xmm0
 39072  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 39073  	LONG $0x14380f66; BYTE $0xdc   // blendvps    xmm3, xmm4, xmm0
 39074  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 39075  	LONG $0x5c110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm3
 39076  
 39077  LBB4_1632:
 39078  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 39079  	JE   LBB4_1655
 39080  	JMP  LBB4_1633
 39081  
 39082  LBB4_1638:
 39083  	WORD $0xf631 // xor    esi, esi
 39084  
 39085  LBB4_1639:
 39086  	LONG $0x01c1f641                           // test    r9b, 1
 39087  	JE   LBB4_1641
 39088  	LONG $0x046e0f66; BYTE $0x31               // movd    xmm0, dword [rcx + rsi]
 39089  	LONG $0x4c6e0f66; WORD $0x0431             // movd    xmm1, dword [rcx + rsi + 4]
 39090  	LONG $0xd2ef0f66                           // pxor    xmm2, xmm2
 39091  	LONG $0xc2740f66                           // pcmpeqb    xmm0, xmm2
 39092  	LONG $0xdb760f66                           // pcmpeqd    xmm3, xmm3
 39093  	LONG $0xc3ef0f66                           // pxor    xmm0, xmm3
 39094  	LONG $0x31380f66; BYTE $0xc0               // pmovzxbd    xmm0, xmm0
 39095  	LONG $0x656f0f66; BYTE $0x50               // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 39096  	LONG $0xc4db0f66                           // pand    xmm0, xmm4
 39097  	LONG $0xca740f66                           // pcmpeqb    xmm1, xmm2
 39098  	LONG $0xcbef0f66                           // pxor    xmm1, xmm3
 39099  	LONG $0x31380f66; BYTE $0xc9               // pmovzxbd    xmm1, xmm1
 39100  	LONG $0xccdb0f66                           // pand    xmm1, xmm4
 39101  	LONG $0x7f0f41f3; WORD $0xb004             // movdqu    oword [r8 + 4*rsi], xmm0
 39102  	LONG $0x7f0f41f3; WORD $0xb04c; BYTE $0x10 // movdqu    oword [r8 + 4*rsi + 16], xmm1
 39103  
 39104  LBB4_1641:
 39105  	WORD $0x394c; BYTE $0xd2 // cmp    rdx, r10
 39106  	JE   LBB4_1655
 39107  	JMP  LBB4_1642
 39108  
 39109  LBB4_1646:
 39110  	WORD $0xf631 // xor    esi, esi
 39111  
 39112  LBB4_1647:
 39113  	LONG $0x01c1f641               // test    r9b, 1
 39114  	JE   LBB4_1649
 39115  	LONG $0x0c6f0ff3; BYTE $0xb1   // movdqu    xmm1, oword [rcx + 4*rsi]
 39116  	LONG $0x546f0ff3; WORD $0x10b1 // movdqu    xmm2, oword [rcx + 4*rsi + 16]
 39117  	LONG $0xdbef0f66               // pxor    xmm3, xmm3
 39118  	LONG $0x656f0f66; BYTE $0x50   // movdqa    xmm4, oword 80[rbp] /* [rip + .LCPI4_8] */
 39119  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
 39120  	LONG $0xc1660f66               // pcmpgtd    xmm0, xmm1
 39121  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
 39122  	LONG $0xeb760f66               // pcmpeqd    xmm5, xmm3
 39123  	LONG $0xc9760f66               // pcmpeqd    xmm1, xmm1
 39124  	LONG $0xe9ef0f66               // pxor    xmm5, xmm1
 39125  	LONG $0xda760f66               // pcmpeqd    xmm3, xmm2
 39126  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
 39127  	LONG $0xcc6f0f66               // movdqa    xmm1, xmm4
 39128  	LONG $0xca660f66               // pcmpgtd    xmm1, xmm2
 39129  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
 39130  	LONG $0x14380f66; BYTE $0xd5   // blendvps    xmm2, xmm5, xmm0
 39131  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
 39132  	LONG $0x14380f66; BYTE $0xe3   // blendvps    xmm4, xmm3, xmm0
 39133  	LONG $0x14110f41; BYTE $0xb0   // movups    oword [r8 + 4*rsi], xmm2
 39134  	LONG $0x64110f41; WORD $0x10b0 // movups    oword [r8 + 4*rsi + 16], xmm4
 39135  
 39136  LBB4_1649:
 39137  	WORD $0x394c; BYTE $0xda // cmp    rdx, r11
 39138  	JE   LBB4_1655
 39139  	JMP  LBB4_1650