github.com/apache/arrow/go/v16@v16.1.0/arrow/compute/internal/kernels/constant_factor_avx2_amd64.s (about)

     1  //go:build go1.18 && !noasm && !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  TEXT ·_multiply_constant_int32_int32_avx2(SB), $0-32
     5  
     6  	MOVQ src+0(FP), DI
     7  	MOVQ dest+8(FP), SI
     8  	MOVQ len+16(FP), DX
     9  	MOVQ factor+24(FP), CX
    10  
    11  	WORD $0xd285             // test    edx, edx
    12  	JLE  LBB0_16
    13  	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
    14  	WORD $0xfa83; BYTE $0x1f // cmp    edx, 31
    15  	JBE  LBB0_2
    16  	LONG $0x8f048d4a         // lea    rax, [rdi + 4*r9]
    17  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
    18  	JBE  LBB0_9
    19  	LONG $0x8e048d4a         // lea    rax, [rsi + 4*r9]
    20  	WORD $0x3948; BYTE $0xf8 // cmp    rax, rdi
    21  	JBE  LBB0_9
    22  
    23  LBB0_2:
    24  	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d
    25  
    26  LBB0_3:
    27  	WORD $0x894d; BYTE $0xd8 // mov    r8, r11
    28  	WORD $0xf749; BYTE $0xd0 // not    r8
    29  	WORD $0x014d; BYTE $0xc8 // add    r8, r9
    30  	WORD $0x894c; BYTE $0xc8 // mov    rax, r9
    31  	LONG $0x03e08348         // and    rax, 3
    32  	JE   LBB0_5
    33  
    34  LBB0_4:
    35  	LONG $0x9f148b42         // mov    edx, dword [rdi + 4*r11]
    36  	WORD $0xaf0f; BYTE $0xd1 // imul    edx, ecx
    37  	LONG $0x9e148942         // mov    dword [rsi + 4*r11], edx
    38  	LONG $0x01c38349         // add    r11, 1
    39  	LONG $0xffc08348         // add    rax, -1
    40  	JNE  LBB0_4
    41  
    42  LBB0_5:
    43  	LONG $0x03f88349 // cmp    r8, 3
    44  	JB   LBB0_16
    45  
    46  LBB0_6:
    47  	LONG $0x9f048b42             // mov    eax, dword [rdi + 4*r11]
    48  	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
    49  	LONG $0x9e048942             // mov    dword [rsi + 4*r11], eax
    50  	LONG $0x9f448b42; BYTE $0x04 // mov    eax, dword [rdi + 4*r11 + 4]
    51  	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
    52  	LONG $0x9e448942; BYTE $0x04 // mov    dword [rsi + 4*r11 + 4], eax
    53  	LONG $0x9f448b42; BYTE $0x08 // mov    eax, dword [rdi + 4*r11 + 8]
    54  	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
    55  	LONG $0x9e448942; BYTE $0x08 // mov    dword [rsi + 4*r11 + 8], eax
    56  	LONG $0x9f448b42; BYTE $0x0c // mov    eax, dword [rdi + 4*r11 + 12]
    57  	WORD $0xaf0f; BYTE $0xc1     // imul    eax, ecx
    58  	LONG $0x9e448942; BYTE $0x0c // mov    dword [rsi + 4*r11 + 12], eax
    59  	LONG $0x04c38349             // add    r11, 4
    60  	WORD $0x394d; BYTE $0xd9     // cmp    r9, r11
    61  	JNE  LBB0_6
    62  	JMP  LBB0_16
    63  
    64  LBB0_9:
    65  	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
    66  	LONG $0xe0e38341             // and    r11d, -32
    67  	LONG $0xc16ef9c5             // vmovd    xmm0, ecx
    68  	LONG $0x587de2c4; BYTE $0xc0 // vpbroadcastd    ymm0, xmm0
    69  	LONG $0xe0438d49             // lea    rax, [r11 - 32]
    70  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
    71  	LONG $0x05e8c149             // shr    r8, 5
    72  	LONG $0x01c08349             // add    r8, 1
    73  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
    74  	JE   LBB0_10
    75  	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
    76  	LONG $0xfee28349             // and    r10, -2
    77  	WORD $0xf749; BYTE $0xda     // neg    r10
    78  	WORD $0xc031                 // xor    eax, eax
    79  
    80  LBB0_12:
    81  	LONG $0x407de2c4; WORD $0x870c             // vpmulld    ymm1, ymm0, yword [rdi + 4*rax]
    82  	LONG $0x407de2c4; WORD $0x8754; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 32]
    83  	LONG $0x407de2c4; WORD $0x875c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 64]
    84  	LONG $0x407de2c4; WORD $0x8764; BYTE $0x60 // vpmulld    ymm4, ymm0, yword [rdi + 4*rax + 96]
    85  	LONG $0x0c7ffec5; BYTE $0x86               // vmovdqu    yword [rsi + 4*rax], ymm1
    86  	LONG $0x547ffec5; WORD $0x2086             // vmovdqu    yword [rsi + 4*rax + 32], ymm2
    87  	LONG $0x5c7ffec5; WORD $0x4086             // vmovdqu    yword [rsi + 4*rax + 64], ymm3
    88  	LONG $0x647ffec5; WORD $0x6086             // vmovdqu    yword [rsi + 4*rax + 96], ymm4
    89  	QUAD $0x0080878c407de2c4; WORD $0x0000     // vpmulld    ymm1, ymm0, yword [rdi + 4*rax + 128]
    90  	QUAD $0x00a08794407de2c4; WORD $0x0000     // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 160]
    91  	QUAD $0x00c0879c407de2c4; WORD $0x0000     // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 192]
    92  	QUAD $0x00e087a4407de2c4; WORD $0x0000     // vpmulld    ymm4, ymm0, yword [rdi + 4*rax + 224]
    93  	QUAD $0x000080868c7ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 128], ymm1
    94  	QUAD $0x0000a086947ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 160], ymm2
    95  	QUAD $0x0000c0869c7ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 192], ymm3
    96  	QUAD $0x0000e086a47ffec5; BYTE $0x00       // vmovdqu    yword [rsi + 4*rax + 224], ymm4
    97  	LONG $0x40c08348                           // add    rax, 64
    98  	LONG $0x02c28349                           // add    r10, 2
    99  	JNE  LBB0_12
   100  	LONG $0x01c0f641                           // test    r8b, 1
   101  	JE   LBB0_15
   102  
   103  LBB0_14:
   104  	LONG $0x407de2c4; WORD $0x870c             // vpmulld    ymm1, ymm0, yword [rdi + 4*rax]
   105  	LONG $0x407de2c4; WORD $0x8754; BYTE $0x20 // vpmulld    ymm2, ymm0, yword [rdi + 4*rax + 32]
   106  	LONG $0x407de2c4; WORD $0x875c; BYTE $0x40 // vpmulld    ymm3, ymm0, yword [rdi + 4*rax + 64]
   107  	LONG $0x407de2c4; WORD $0x8744; BYTE $0x60 // vpmulld    ymm0, ymm0, yword [rdi + 4*rax + 96]
   108  	LONG $0x0c7ffec5; BYTE $0x86               // vmovdqu    yword [rsi + 4*rax], ymm1
   109  	LONG $0x547ffec5; WORD $0x2086             // vmovdqu    yword [rsi + 4*rax + 32], ymm2
   110  	LONG $0x5c7ffec5; WORD $0x4086             // vmovdqu    yword [rsi + 4*rax + 64], ymm3
   111  	LONG $0x447ffec5; WORD $0x6086             // vmovdqu    yword [rsi + 4*rax + 96], ymm0
   112  
   113  LBB0_15:
   114  	WORD $0x394d; BYTE $0xcb // cmp    r11, r9
   115  	JNE  LBB0_3
   116  
   117  LBB0_16:
   118  	VZEROUPPER
   119  	RET
   120  
   121  LBB0_10:
   122  	WORD $0xc031     // xor    eax, eax
   123  	LONG $0x01c0f641 // test    r8b, 1
   124  	JNE  LBB0_14
   125  	JMP  LBB0_15
   126  
   127  TEXT ·_divide_constant_int32_int32_avx2(SB), $0-32
   128  
   129  	MOVQ src+0(FP), DI
   130  	MOVQ dest+8(FP), SI
   131  	MOVQ len+16(FP), DX
   132  	MOVQ factor+24(FP), CX
   133  
   134  	WORD $0xd285             // test    edx, edx
   135  	JLE  LBB1_8
   136  	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
   137  	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
   138  	JNE  LBB1_9
   139  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   140  
   141  LBB1_3:
   142  	LONG $0x01c1f641         // test    r9b, 1
   143  	JE   LBB1_8
   144  	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
   145  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   146  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   147  	LONG $0x20eac148         // shr    rdx, 32
   148  	JE   LBB1_5
   149  	WORD $0x9948             // cqo
   150  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   151  	JMP  LBB1_7
   152  
   153  LBB1_9:
   154  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   155  	LONG $0xfee28341         // and    r10d, -2
   156  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   157  	JMP  LBB1_10
   158  
   159  LBB1_15:
   160  	WORD $0x9948             // cqo
   161  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   162  
   163  LBB1_16:
   164  	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
   165  	LONG $0x02c08349             // add    r8, 2
   166  	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
   167  	JE   LBB1_3
   168  
   169  LBB1_10:
   170  	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
   171  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   172  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   173  	LONG $0x20eac148         // shr    rdx, 32
   174  	JE   LBB1_11
   175  	WORD $0x9948             // cqo
   176  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   177  	JMP  LBB1_13
   178  
   179  LBB1_11:
   180  	WORD $0xd231 // xor    edx, edx
   181  	WORD $0xf1f7 // div    ecx
   182  
   183  LBB1_13:
   184  	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
   185  	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
   186  	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
   187  	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
   188  	LONG $0x20eac148             // shr    rdx, 32
   189  	JNE  LBB1_15
   190  	WORD $0xd231                 // xor    edx, edx
   191  	WORD $0xf1f7                 // div    ecx
   192  	JMP  LBB1_16
   193  
   194  LBB1_5:
   195  	WORD $0xd231 // xor    edx, edx
   196  	WORD $0xf1f7 // div    ecx
   197  
   198  LBB1_7:
   199  	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax
   200  
   201  LBB1_8:
   202  	RET
   203  
   204  TEXT ·_multiply_constant_int32_int64_avx2(SB), $0-32
   205  
   206  	MOVQ src+0(FP), DI
   207  	MOVQ dest+8(FP), SI
   208  	MOVQ len+16(FP), DX
   209  	MOVQ factor+24(FP), CX
   210  
   211  	WORD $0xd285             // test    edx, edx
   212  	JLE  LBB2_7
   213  	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
   214  	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
   215  	JA   LBB2_3
   216  	WORD $0xd231             // xor    edx, edx
   217  	JMP  LBB2_6
   218  
   219  LBB2_3:
   220  	WORD $0x8944; BYTE $0xc2     // mov    edx, r8d
   221  	WORD $0xe283; BYTE $0xf0     // and    edx, -16
   222  	LONG $0x6ef9e1c4; BYTE $0xc1 // vmovq    xmm0, rcx
   223  	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
   224  	WORD $0xc031                 // xor    eax, eax
   225  	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
   226  
   227  LBB2_4:
   228  	LONG $0x257de2c4; WORD $0x8714             // vpmovsxdq    ymm2, oword [rdi + 4*rax]
   229  	LONG $0x257de2c4; WORD $0x875c; BYTE $0x10 // vpmovsxdq    ymm3, oword [rdi + 4*rax + 16]
   230  	LONG $0x257de2c4; WORD $0x8764; BYTE $0x20 // vpmovsxdq    ymm4, oword [rdi + 4*rax + 32]
   231  	LONG $0x257de2c4; WORD $0x876c; BYTE $0x30 // vpmovsxdq    ymm5, oword [rdi + 4*rax + 48]
   232  	LONG $0xf2f4f5c5                           // vpmuludq    ymm6, ymm1, ymm2
   233  	LONG $0xd273c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm2, 32
   234  	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
   235  	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
   236  	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
   237  	LONG $0xd2f4fdc5                           // vpmuludq    ymm2, ymm0, ymm2
   238  	LONG $0xd6d4edc5                           // vpaddq    ymm2, ymm2, ymm6
   239  	LONG $0xf3f4f5c5                           // vpmuludq    ymm6, ymm1, ymm3
   240  	LONG $0xd373c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm3, 32
   241  	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
   242  	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
   243  	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
   244  	LONG $0xdbf4fdc5                           // vpmuludq    ymm3, ymm0, ymm3
   245  	LONG $0xded4e5c5                           // vpaddq    ymm3, ymm3, ymm6
   246  	LONG $0xf4f4f5c5                           // vpmuludq    ymm6, ymm1, ymm4
   247  	LONG $0xd473c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm4, 32
   248  	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
   249  	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
   250  	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
   251  	LONG $0xe4f4fdc5                           // vpmuludq    ymm4, ymm0, ymm4
   252  	LONG $0xe6d4ddc5                           // vpaddq    ymm4, ymm4, ymm6
   253  	LONG $0xf5f4f5c5                           // vpmuludq    ymm6, ymm1, ymm5
   254  	LONG $0xd573c5c5; BYTE $0x20               // vpsrlq    ymm7, ymm5, 32
   255  	LONG $0xfff4fdc5                           // vpmuludq    ymm7, ymm0, ymm7
   256  	LONG $0xf6d4c5c5                           // vpaddq    ymm6, ymm7, ymm6
   257  	LONG $0xf673cdc5; BYTE $0x20               // vpsllq    ymm6, ymm6, 32
   258  	LONG $0xedf4fdc5                           // vpmuludq    ymm5, ymm0, ymm5
   259  	LONG $0xeed4d5c5                           // vpaddq    ymm5, ymm5, ymm6
   260  	LONG $0x147ffec5; BYTE $0xc6               // vmovdqu    yword [rsi + 8*rax], ymm2
   261  	LONG $0x5c7ffec5; WORD $0x20c6             // vmovdqu    yword [rsi + 8*rax + 32], ymm3
   262  	LONG $0x647ffec5; WORD $0x40c6             // vmovdqu    yword [rsi + 8*rax + 64], ymm4
   263  	LONG $0x6c7ffec5; WORD $0x60c6             // vmovdqu    yword [rsi + 8*rax + 96], ymm5
   264  	LONG $0x10c08348                           // add    rax, 16
   265  	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
   266  	JNE  LBB2_4
   267  	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
   268  	JE   LBB2_7
   269  
   270  LBB2_6:
   271  	LONG $0x97046348         // movsxd    rax, dword [rdi + 4*rdx]
   272  	LONG $0xc1af0f48         // imul    rax, rcx
   273  	LONG $0xd6048948         // mov    qword [rsi + 8*rdx], rax
   274  	LONG $0x01c28348         // add    rdx, 1
   275  	WORD $0x3949; BYTE $0xd0 // cmp    r8, rdx
   276  	JNE  LBB2_6
   277  
   278  LBB2_7:
   279  	VZEROUPPER
   280  	RET
   281  
   282  TEXT ·_divide_constant_int32_int64_avx2(SB), $0-32
   283  
   284  	MOVQ src+0(FP), DI
   285  	MOVQ dest+8(FP), SI
   286  	MOVQ len+16(FP), DX
   287  	MOVQ factor+24(FP), CX
   288  
   289  	WORD $0xd285             // test    edx, edx
   290  	JLE  LBB3_8
   291  	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
   292  	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
   293  	JNE  LBB3_9
   294  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   295  
   296  LBB3_3:
   297  	LONG $0x01c1f641         // test    r9b, 1
   298  	JE   LBB3_8
   299  	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
   300  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   301  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   302  	LONG $0x20eac148         // shr    rdx, 32
   303  	JE   LBB3_5
   304  	WORD $0x9948             // cqo
   305  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   306  	JMP  LBB3_7
   307  
   308  LBB3_9:
   309  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   310  	LONG $0xfee28341         // and    r10d, -2
   311  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   312  	JMP  LBB3_10
   313  
   314  LBB3_15:
   315  	WORD $0x9948             // cqo
   316  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   317  
   318  LBB3_16:
   319  	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
   320  	LONG $0x02c08349             // add    r8, 2
   321  	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
   322  	JE   LBB3_3
   323  
   324  LBB3_10:
   325  	LONG $0x8704634a         // movsxd    rax, dword [rdi + 4*r8]
   326  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   327  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   328  	LONG $0x20eac148         // shr    rdx, 32
   329  	JE   LBB3_11
   330  	WORD $0x9948             // cqo
   331  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   332  	JMP  LBB3_13
   333  
   334  LBB3_11:
   335  	WORD $0xd231 // xor    edx, edx
   336  	WORD $0xf1f7 // div    ecx
   337  
   338  LBB3_13:
   339  	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
   340  	LONG $0x8744634a; BYTE $0x04 // movsxd    rax, dword [rdi + 4*r8 + 4]
   341  	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
   342  	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
   343  	LONG $0x20eac148             // shr    rdx, 32
   344  	JNE  LBB3_15
   345  	WORD $0xd231                 // xor    edx, edx
   346  	WORD $0xf1f7                 // div    ecx
   347  	JMP  LBB3_16
   348  
   349  LBB3_5:
   350  	WORD $0xd231 // xor    edx, edx
   351  	WORD $0xf1f7 // div    ecx
   352  
   353  LBB3_7:
   354  	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax
   355  
   356  LBB3_8:
   357  	RET
   358  
   359  TEXT ·_multiply_constant_int64_int32_avx2(SB), $0-32
   360  
   361  	MOVQ src+0(FP), DI
   362  	MOVQ dest+8(FP), SI
   363  	MOVQ len+16(FP), DX
   364  	MOVQ factor+24(FP), CX
   365  
   366  	WORD $0xd285             // test    edx, edx
   367  	JLE  LBB4_7
   368  	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
   369  	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
   370  	JA   LBB4_3
   371  	WORD $0xd231             // xor    edx, edx
   372  	JMP  LBB4_6
   373  
   374  LBB4_3:
   375  	WORD $0x8944; BYTE $0xc2       // mov    edx, r8d
   376  	WORD $0xe283; BYTE $0xf0       // and    edx, -16
   377  	LONG $0x6ef9e1c4; BYTE $0xc1   // vmovq    xmm0, rcx
   378  	LONG $0x597de2c4; BYTE $0xc0   // vpbroadcastq    ymm0, xmm0
   379  	WORD $0xc031                   // xor    eax, eax
   380  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
   381  
   382  LBB4_4:
   383  	LONG $0x1410f8c5; BYTE $0xc7               // vmovups    xmm2, oword [rdi + 8*rax]
   384  	LONG $0x5c10f8c5; WORD $0x20c7             // vmovups    xmm3, oword [rdi + 8*rax + 32]
   385  	LONG $0x6410f8c5; WORD $0x40c7             // vmovups    xmm4, oword [rdi + 8*rax + 64]
   386  	LONG $0x6c10f8c5; WORD $0x60c7             // vmovups    xmm5, oword [rdi + 8*rax + 96]
   387  	LONG $0x54c6e8c5; WORD $0x10c7; BYTE $0x88 // vshufps    xmm2, xmm2, oword [rdi + 8*rax + 16], 136
   388  	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
   389  	LONG $0x4069e2c4; BYTE $0xd6               // vpmulld    xmm2, xmm2, xmm6
   390  	LONG $0x5cc6e0c5; WORD $0x30c7; BYTE $0x88 // vshufps    xmm3, xmm3, oword [rdi + 8*rax + 48], 136
   391  	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
   392  	LONG $0x4061e2c4; BYTE $0xde               // vpmulld    xmm3, xmm3, xmm6
   393  	LONG $0x64c6d8c5; WORD $0x50c7; BYTE $0x88 // vshufps    xmm4, xmm4, oword [rdi + 8*rax + 80], 136
   394  	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
   395  	LONG $0x4059e2c4; BYTE $0xe6               // vpmulld    xmm4, xmm4, xmm6
   396  	LONG $0x6cc6d0c5; WORD $0x70c7; BYTE $0x88 // vshufps    xmm5, xmm5, oword [rdi + 8*rax + 112], 136
   397  	LONG $0xf1c6f8c5; BYTE $0x88               // vshufps    xmm6, xmm0, xmm1, 136
   398  	LONG $0x4051e2c4; BYTE $0xee               // vpmulld    xmm5, xmm5, xmm6
   399  	LONG $0x147ffac5; BYTE $0x86               // vmovdqu    oword [rsi + 4*rax], xmm2
   400  	LONG $0x5c7ffac5; WORD $0x1086             // vmovdqu    oword [rsi + 4*rax + 16], xmm3
   401  	LONG $0x647ffac5; WORD $0x2086             // vmovdqu    oword [rsi + 4*rax + 32], xmm4
   402  	LONG $0x6c7ffac5; WORD $0x3086             // vmovdqu    oword [rsi + 4*rax + 48], xmm5
   403  	LONG $0x10c08348                           // add    rax, 16
   404  	WORD $0x3948; BYTE $0xc2                   // cmp    rdx, rax
   405  	JNE  LBB4_4
   406  	WORD $0x394c; BYTE $0xc2                   // cmp    rdx, r8
   407  	JE   LBB4_7
   408  
   409  LBB4_6:
   410  	WORD $0x048b; BYTE $0xd7 // mov    eax, dword [rdi + 8*rdx]
   411  	WORD $0xaf0f; BYTE $0xc1 // imul    eax, ecx
   412  	WORD $0x0489; BYTE $0x96 // mov    dword [rsi + 4*rdx], eax
   413  	LONG $0x01c28348         // add    rdx, 1
   414  	WORD $0x3949; BYTE $0xd0 // cmp    r8, rdx
   415  	JNE  LBB4_6
   416  
   417  LBB4_7:
   418  	VZEROUPPER
   419  	RET
   420  
   421  TEXT ·_divide_constant_int64_int32_avx2(SB), $0-32
   422  
   423  	MOVQ src+0(FP), DI
   424  	MOVQ dest+8(FP), SI
   425  	MOVQ len+16(FP), DX
   426  	MOVQ factor+24(FP), CX
   427  
   428  	WORD $0xd285             // test    edx, edx
   429  	JLE  LBB5_8
   430  	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
   431  	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
   432  	JNE  LBB5_9
   433  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   434  
   435  LBB5_3:
   436  	LONG $0x01c1f641         // test    r9b, 1
   437  	JE   LBB5_8
   438  	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
   439  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   440  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   441  	LONG $0x20eac148         // shr    rdx, 32
   442  	JE   LBB5_5
   443  	WORD $0x9948             // cqo
   444  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   445  	JMP  LBB5_7
   446  
   447  LBB5_9:
   448  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   449  	LONG $0xfee28341         // and    r10d, -2
   450  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   451  	JMP  LBB5_10
   452  
   453  LBB5_15:
   454  	WORD $0x9948             // cqo
   455  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   456  
   457  LBB5_16:
   458  	LONG $0x86448942; BYTE $0x04 // mov    dword [rsi + 4*r8 + 4], eax
   459  	LONG $0x02c08349             // add    r8, 2
   460  	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
   461  	JE   LBB5_3
   462  
   463  LBB5_10:
   464  	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
   465  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   466  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   467  	LONG $0x20eac148         // shr    rdx, 32
   468  	JE   LBB5_11
   469  	WORD $0x9948             // cqo
   470  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   471  	JMP  LBB5_13
   472  
   473  LBB5_11:
   474  	WORD $0xd231 // xor    edx, edx
   475  	WORD $0xf1f7 // div    ecx
   476  
   477  LBB5_13:
   478  	LONG $0x86048942             // mov    dword [rsi + 4*r8], eax
   479  	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
   480  	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
   481  	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
   482  	LONG $0x20eac148             // shr    rdx, 32
   483  	JNE  LBB5_15
   484  	WORD $0xd231                 // xor    edx, edx
   485  	WORD $0xf1f7                 // div    ecx
   486  	JMP  LBB5_16
   487  
   488  LBB5_5:
   489  	WORD $0xd231 // xor    edx, edx
   490  	WORD $0xf1f7 // div    ecx
   491  
   492  LBB5_7:
   493  	LONG $0x86048942 // mov    dword [rsi + 4*r8], eax
   494  
   495  LBB5_8:
   496  	RET
   497  
   498  TEXT ·_multiply_constant_int64_int64_avx2(SB), $0-32
   499  
   500  	MOVQ src+0(FP), DI
   501  	MOVQ dest+8(FP), SI
   502  	MOVQ len+16(FP), DX
   503  	MOVQ factor+24(FP), CX
   504  
   505  	WORD $0xd285             // test    edx, edx
   506  	JLE  LBB6_16
   507  	WORD $0x8941; BYTE $0xd0 // mov    r8d, edx
   508  	WORD $0xfa83; BYTE $0x0f // cmp    edx, 15
   509  	JBE  LBB6_2
   510  	LONG $0xc7048d4a         // lea    rax, [rdi + 8*r8]
   511  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   512  	JBE  LBB6_9
   513  	LONG $0xc6048d4a         // lea    rax, [rsi + 8*r8]
   514  	WORD $0x3948; BYTE $0xf8 // cmp    rax, rdi
   515  	JBE  LBB6_9
   516  
   517  LBB6_2:
   518  	WORD $0x3145; BYTE $0xdb // xor    r11d, r11d
   519  
   520  LBB6_3:
   521  	WORD $0x894d; BYTE $0xd9 // mov    r9, r11
   522  	WORD $0xf749; BYTE $0xd1 // not    r9
   523  	WORD $0x014d; BYTE $0xc1 // add    r9, r8
   524  	WORD $0x894c; BYTE $0xc0 // mov    rax, r8
   525  	LONG $0x03e08348         // and    rax, 3
   526  	JE   LBB6_5
   527  
   528  LBB6_4:
   529  	LONG $0xdf148b4a // mov    rdx, qword [rdi + 8*r11]
   530  	LONG $0xd1af0f48 // imul    rdx, rcx
   531  	LONG $0xde14894a // mov    qword [rsi + 8*r11], rdx
   532  	LONG $0x01c38349 // add    r11, 1
   533  	LONG $0xffc08348 // add    rax, -1
   534  	JNE  LBB6_4
   535  
   536  LBB6_5:
   537  	LONG $0x03f98349 // cmp    r9, 3
   538  	JB   LBB6_16
   539  
   540  LBB6_6:
   541  	LONG $0xdf048b4a             // mov    rax, qword [rdi + 8*r11]
   542  	LONG $0xc1af0f48             // imul    rax, rcx
   543  	LONG $0xde04894a             // mov    qword [rsi + 8*r11], rax
   544  	LONG $0xdf448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r11 + 8]
   545  	LONG $0xc1af0f48             // imul    rax, rcx
   546  	LONG $0xde44894a; BYTE $0x08 // mov    qword [rsi + 8*r11 + 8], rax
   547  	LONG $0xdf448b4a; BYTE $0x10 // mov    rax, qword [rdi + 8*r11 + 16]
   548  	LONG $0xc1af0f48             // imul    rax, rcx
   549  	LONG $0xde44894a; BYTE $0x10 // mov    qword [rsi + 8*r11 + 16], rax
   550  	LONG $0xdf448b4a; BYTE $0x18 // mov    rax, qword [rdi + 8*r11 + 24]
   551  	LONG $0xc1af0f48             // imul    rax, rcx
   552  	LONG $0xde44894a; BYTE $0x18 // mov    qword [rsi + 8*r11 + 24], rax
   553  	LONG $0x04c38349             // add    r11, 4
   554  	WORD $0x394d; BYTE $0xd8     // cmp    r8, r11
   555  	JNE  LBB6_6
   556  	JMP  LBB6_16
   557  
   558  LBB6_9:
   559  	WORD $0x8945; BYTE $0xc3     // mov    r11d, r8d
   560  	LONG $0xf0e38341             // and    r11d, -16
   561  	LONG $0x6ef9e1c4; BYTE $0xc1 // vmovq    xmm0, rcx
   562  	LONG $0x597de2c4; BYTE $0xc0 // vpbroadcastq    ymm0, xmm0
   563  	LONG $0xf0438d49             // lea    rax, [r11 - 16]
   564  	WORD $0x8949; BYTE $0xc1     // mov    r9, rax
   565  	LONG $0x04e9c149             // shr    r9, 4
   566  	LONG $0x01c18349             // add    r9, 1
   567  	LONG $0xd073f5c5; BYTE $0x20 // vpsrlq    ymm1, ymm0, 32
   568  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
   569  	JE   LBB6_10
   570  	WORD $0x894d; BYTE $0xca     // mov    r10, r9
   571  	LONG $0xfee28349             // and    r10, -2
   572  	WORD $0xf749; BYTE $0xda     // neg    r10
   573  	WORD $0xc031                 // xor    eax, eax
   574  
   575  LBB6_12:
   576  	LONG $0x146ffec5; BYTE $0xc7         // vmovdqu    ymm2, yword [rdi + 8*rax]
   577  	LONG $0x5c6ffec5; WORD $0x20c7       // vmovdqu    ymm3, yword [rdi + 8*rax + 32]
   578  	LONG $0x646ffec5; WORD $0x40c7       // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
   579  	LONG $0x6c6ffec5; WORD $0x60c7       // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
   580  	LONG $0xf1f4edc5                     // vpmuludq    ymm6, ymm2, ymm1
   581  	LONG $0xd273c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm2, 32
   582  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   583  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   584  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   585  	LONG $0xd0f4edc5                     // vpmuludq    ymm2, ymm2, ymm0
   586  	LONG $0xd6d4edc5                     // vpaddq    ymm2, ymm2, ymm6
   587  	LONG $0xf1f4e5c5                     // vpmuludq    ymm6, ymm3, ymm1
   588  	LONG $0xd373c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm3, 32
   589  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   590  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   591  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   592  	LONG $0xd8f4e5c5                     // vpmuludq    ymm3, ymm3, ymm0
   593  	LONG $0xded4e5c5                     // vpaddq    ymm3, ymm3, ymm6
   594  	LONG $0xf1f4ddc5                     // vpmuludq    ymm6, ymm4, ymm1
   595  	LONG $0xd473c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm4, 32
   596  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   597  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   598  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   599  	LONG $0xe0f4ddc5                     // vpmuludq    ymm4, ymm4, ymm0
   600  	LONG $0xe6d4ddc5                     // vpaddq    ymm4, ymm4, ymm6
   601  	LONG $0xf1f4d5c5                     // vpmuludq    ymm6, ymm5, ymm1
   602  	LONG $0xd573c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm5, 32
   603  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   604  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   605  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   606  	LONG $0xe8f4d5c5                     // vpmuludq    ymm5, ymm5, ymm0
   607  	LONG $0xeed4d5c5                     // vpaddq    ymm5, ymm5, ymm6
   608  	LONG $0x147ffec5; BYTE $0xc6         // vmovdqu    yword [rsi + 8*rax], ymm2
   609  	LONG $0x5c7ffec5; WORD $0x20c6       // vmovdqu    yword [rsi + 8*rax + 32], ymm3
   610  	LONG $0x647ffec5; WORD $0x40c6       // vmovdqu    yword [rsi + 8*rax + 64], ymm4
   611  	LONG $0x6c7ffec5; WORD $0x60c6       // vmovdqu    yword [rsi + 8*rax + 96], ymm5
   612  	QUAD $0x000080c7946ffec5; BYTE $0x00 // vmovdqu    ymm2, yword [rdi + 8*rax + 128]
   613  	QUAD $0x0000a0c79c6ffec5; BYTE $0x00 // vmovdqu    ymm3, yword [rdi + 8*rax + 160]
   614  	QUAD $0x0000c0c7a46ffec5; BYTE $0x00 // vmovdqu    ymm4, yword [rdi + 8*rax + 192]
   615  	QUAD $0x0000e0c7ac6ffec5; BYTE $0x00 // vmovdqu    ymm5, yword [rdi + 8*rax + 224]
   616  	LONG $0xf1f4edc5                     // vpmuludq    ymm6, ymm2, ymm1
   617  	LONG $0xd273c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm2, 32
   618  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   619  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   620  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   621  	LONG $0xd0f4edc5                     // vpmuludq    ymm2, ymm2, ymm0
   622  	LONG $0xd6d4edc5                     // vpaddq    ymm2, ymm2, ymm6
   623  	LONG $0xf1f4e5c5                     // vpmuludq    ymm6, ymm3, ymm1
   624  	LONG $0xd373c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm3, 32
   625  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   626  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   627  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   628  	LONG $0xd8f4e5c5                     // vpmuludq    ymm3, ymm3, ymm0
   629  	LONG $0xded4e5c5                     // vpaddq    ymm3, ymm3, ymm6
   630  	LONG $0xf1f4ddc5                     // vpmuludq    ymm6, ymm4, ymm1
   631  	LONG $0xd473c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm4, 32
   632  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   633  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   634  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   635  	LONG $0xe0f4ddc5                     // vpmuludq    ymm4, ymm4, ymm0
   636  	LONG $0xe6d4ddc5                     // vpaddq    ymm4, ymm4, ymm6
   637  	LONG $0xf1f4d5c5                     // vpmuludq    ymm6, ymm5, ymm1
   638  	LONG $0xd573c5c5; BYTE $0x20         // vpsrlq    ymm7, ymm5, 32
   639  	LONG $0xf8f4c5c5                     // vpmuludq    ymm7, ymm7, ymm0
   640  	LONG $0xf7d4cdc5                     // vpaddq    ymm6, ymm6, ymm7
   641  	LONG $0xf673cdc5; BYTE $0x20         // vpsllq    ymm6, ymm6, 32
   642  	LONG $0xe8f4d5c5                     // vpmuludq    ymm5, ymm5, ymm0
   643  	LONG $0xeed4d5c5                     // vpaddq    ymm5, ymm5, ymm6
   644  	QUAD $0x000080c6947ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 128], ymm2
   645  	QUAD $0x0000a0c69c7ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 160], ymm3
   646  	QUAD $0x0000c0c6a47ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 192], ymm4
   647  	QUAD $0x0000e0c6ac7ffec5; BYTE $0x00 // vmovdqu    yword [rsi + 8*rax + 224], ymm5
   648  	LONG $0x20c08348                     // add    rax, 32
   649  	LONG $0x02c28349                     // add    r10, 2
   650  	JNE  LBB6_12
   651  	LONG $0x01c1f641                     // test    r9b, 1
   652  	JE   LBB6_15
   653  
   654  LBB6_14:
   655  	LONG $0x146ffec5; BYTE $0xc7   // vmovdqu    ymm2, yword [rdi + 8*rax]
   656  	LONG $0x5c6ffec5; WORD $0x20c7 // vmovdqu    ymm3, yword [rdi + 8*rax + 32]
   657  	LONG $0x646ffec5; WORD $0x40c7 // vmovdqu    ymm4, yword [rdi + 8*rax + 64]
   658  	LONG $0x6c6ffec5; WORD $0x60c7 // vmovdqu    ymm5, yword [rdi + 8*rax + 96]
   659  	LONG $0xf1f4edc5               // vpmuludq    ymm6, ymm2, ymm1
   660  	LONG $0xd273c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm2, 32
   661  	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
   662  	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
   663  	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
   664  	LONG $0xd0f4edc5               // vpmuludq    ymm2, ymm2, ymm0
   665  	LONG $0xd6d4edc5               // vpaddq    ymm2, ymm2, ymm6
   666  	LONG $0xf1f4e5c5               // vpmuludq    ymm6, ymm3, ymm1
   667  	LONG $0xd373c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm3, 32
   668  	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
   669  	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
   670  	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
   671  	LONG $0xd8f4e5c5               // vpmuludq    ymm3, ymm3, ymm0
   672  	LONG $0xded4e5c5               // vpaddq    ymm3, ymm3, ymm6
   673  	LONG $0xf1f4ddc5               // vpmuludq    ymm6, ymm4, ymm1
   674  	LONG $0xd473c5c5; BYTE $0x20   // vpsrlq    ymm7, ymm4, 32
   675  	LONG $0xf8f4c5c5               // vpmuludq    ymm7, ymm7, ymm0
   676  	LONG $0xf7d4cdc5               // vpaddq    ymm6, ymm6, ymm7
   677  	LONG $0xf673cdc5; BYTE $0x20   // vpsllq    ymm6, ymm6, 32
   678  	LONG $0xe0f4ddc5               // vpmuludq    ymm4, ymm4, ymm0
   679  	LONG $0xe6d4ddc5               // vpaddq    ymm4, ymm4, ymm6
   680  	LONG $0xc9f4d5c5               // vpmuludq    ymm1, ymm5, ymm1
   681  	LONG $0xd573cdc5; BYTE $0x20   // vpsrlq    ymm6, ymm5, 32
   682  	LONG $0xf0f4cdc5               // vpmuludq    ymm6, ymm6, ymm0
   683  	LONG $0xced4f5c5               // vpaddq    ymm1, ymm1, ymm6
   684  	LONG $0xf173f5c5; BYTE $0x20   // vpsllq    ymm1, ymm1, 32
   685  	LONG $0xc0f4d5c5               // vpmuludq    ymm0, ymm5, ymm0
   686  	LONG $0xc1d4fdc5               // vpaddq    ymm0, ymm0, ymm1
   687  	LONG $0x147ffec5; BYTE $0xc6   // vmovdqu    yword [rsi + 8*rax], ymm2
   688  	LONG $0x5c7ffec5; WORD $0x20c6 // vmovdqu    yword [rsi + 8*rax + 32], ymm3
   689  	LONG $0x647ffec5; WORD $0x40c6 // vmovdqu    yword [rsi + 8*rax + 64], ymm4
   690  	LONG $0x447ffec5; WORD $0x60c6 // vmovdqu    yword [rsi + 8*rax + 96], ymm0
   691  
   692  LBB6_15:
   693  	WORD $0x394d; BYTE $0xc3 // cmp    r11, r8
   694  	JNE  LBB6_3
   695  
   696  LBB6_16:
   697  	VZEROUPPER
   698  	RET
   699  
   700  LBB6_10:
   701  	WORD $0xc031     // xor    eax, eax
   702  	LONG $0x01c1f641 // test    r9b, 1
   703  	JNE  LBB6_14
   704  	JMP  LBB6_15
   705  
   706  TEXT ·_divide_constant_int64_int64_avx2(SB), $0-32
   707  
   708  	MOVQ src+0(FP), DI
   709  	MOVQ dest+8(FP), SI
   710  	MOVQ len+16(FP), DX
   711  	MOVQ factor+24(FP), CX
   712  
   713  	WORD $0xd285             // test    edx, edx
   714  	JLE  LBB7_8
   715  	WORD $0x8941; BYTE $0xd1 // mov    r9d, edx
   716  	WORD $0xfa83; BYTE $0x01 // cmp    edx, 1
   717  	JNE  LBB7_9
   718  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   719  
   720  LBB7_3:
   721  	LONG $0x01c1f641         // test    r9b, 1
   722  	JE   LBB7_8
   723  	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
   724  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   725  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   726  	LONG $0x20eac148         // shr    rdx, 32
   727  	JE   LBB7_5
   728  	WORD $0x9948             // cqo
   729  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   730  	JMP  LBB7_7
   731  
   732  LBB7_9:
   733  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   734  	LONG $0xfee28341         // and    r10d, -2
   735  	WORD $0x3145; BYTE $0xc0 // xor    r8d, r8d
   736  	JMP  LBB7_10
   737  
   738  LBB7_15:
   739  	WORD $0x9948             // cqo
   740  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   741  
   742  LBB7_16:
   743  	LONG $0xc644894a; BYTE $0x08 // mov    qword [rsi + 8*r8 + 8], rax
   744  	LONG $0x02c08349             // add    r8, 2
   745  	WORD $0x394d; BYTE $0xc2     // cmp    r10, r8
   746  	JE   LBB7_3
   747  
   748  LBB7_10:
   749  	LONG $0xc7048b4a         // mov    rax, qword [rdi + 8*r8]
   750  	WORD $0x8948; BYTE $0xc2 // mov    rdx, rax
   751  	WORD $0x0948; BYTE $0xca // or    rdx, rcx
   752  	LONG $0x20eac148         // shr    rdx, 32
   753  	JE   LBB7_11
   754  	WORD $0x9948             // cqo
   755  	WORD $0xf748; BYTE $0xf9 // idiv    rcx
   756  	JMP  LBB7_13
   757  
   758  LBB7_11:
   759  	WORD $0xd231 // xor    edx, edx
   760  	WORD $0xf1f7 // div    ecx
   761  
   762  LBB7_13:
   763  	LONG $0xc604894a             // mov    qword [rsi + 8*r8], rax
   764  	LONG $0xc7448b4a; BYTE $0x08 // mov    rax, qword [rdi + 8*r8 + 8]
   765  	WORD $0x8948; BYTE $0xc2     // mov    rdx, rax
   766  	WORD $0x0948; BYTE $0xca     // or    rdx, rcx
   767  	LONG $0x20eac148             // shr    rdx, 32
   768  	JNE  LBB7_15
   769  	WORD $0xd231                 // xor    edx, edx
   770  	WORD $0xf1f7                 // div    ecx
   771  	JMP  LBB7_16
   772  
   773  LBB7_5:
   774  	WORD $0xd231 // xor    edx, edx
   775  	WORD $0xf1f7 // div    ecx
   776  
   777  LBB7_7:
   778  	LONG $0xc604894a // mov    qword [rsi + 8*r8], rax
   779  
   780  LBB7_8:
   781  	RET