github.com/apache/arrow/go/v14@v14.0.1/internal/utils/min_max_avx2_amd64.s (about)

     1  //+build !noasm !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x8080808080808080
     5  DATA LCDATA1<>+0x008(SB)/8, $0x8080808080808080
     6  DATA LCDATA1<>+0x010(SB)/8, $0x8080808080808080
     7  DATA LCDATA1<>+0x018(SB)/8, $0x8080808080808080
     8  DATA LCDATA1<>+0x020(SB)/8, $0x7f7f7f7f7f7f7f7f
     9  DATA LCDATA1<>+0x028(SB)/8, $0x7f7f7f7f7f7f7f7f
    10  DATA LCDATA1<>+0x030(SB)/8, $0x7f7f7f7f7f7f7f7f
    11  DATA LCDATA1<>+0x038(SB)/8, $0x7f7f7f7f7f7f7f7f
    12  DATA LCDATA1<>+0x040(SB)/8, $0x7f7f7f7f7f7f7f7f
    13  DATA LCDATA1<>+0x048(SB)/8, $0x7f7f7f7f7f7f7f7f
    14  DATA LCDATA1<>+0x050(SB)/8, $0x8080808080808080
    15  DATA LCDATA1<>+0x058(SB)/8, $0x8080808080808080
    16  GLOBL LCDATA1<>(SB), 8, $96
    17  
    18  TEXT ·_int8_max_min_avx2(SB), $0-32
    19  
    20  	MOVQ values+0(FP), DI
    21  	MOVQ length+8(FP), SI
    22  	MOVQ minout+16(FP), DX
    23  	MOVQ maxout+24(FP), CX
    24  	LEAQ LCDATA1<>(SB), BP
    25  
    26  	WORD $0xf685             // test    esi, esi
    27  	JLE  LBB0_1
    28  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
    29  	WORD $0xfe83; BYTE $0x3f // cmp    esi, 63
    30  	JA   LBB0_4
    31  	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
    32  	WORD $0xb640; BYTE $0x7f // mov    sil, 127
    33  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
    34  	JMP  LBB0_11
    35  
    36  LBB0_1:
    37  	WORD $0xb640; BYTE $0x7f // mov    sil, 127
    38  	WORD $0xb041; BYTE $0x80 // mov    r8b, -128
    39  	JMP  LBB0_12
    40  
    41  LBB0_4:
    42  	WORD $0x8945; BYTE $0xca     // mov    r10d, r9d
    43  	LONG $0xc0e28341             // and    r10d, -64
    44  	LONG $0xc0428d49             // lea    rax, [r10 - 64]
    45  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
    46  	LONG $0x06e8c149             // shr    r8, 6
    47  	LONG $0x01c08349             // add    r8, 1
    48  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
    49  	JE   LBB0_5
    50  	WORD $0x894c; BYTE $0xc6     // mov    rsi, r8
    51  	LONG $0xfee68348             // and    rsi, -2
    52  	WORD $0xf748; BYTE $0xde     // neg    rsi
    53  	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
    54  	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
    55  	WORD $0xc031                 // xor    eax, eax
    56  	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
    57  	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
    58  
    59  LBB0_7:
    60  	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
    61  	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
    62  	LONG $0x746ffec5; WORD $0x4007 // vmovdqu    ymm6, yword [rdi + rax + 64]
    63  	LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu    ymm7, yword [rdi + rax + 96]
    64  	LONG $0x387de2c4; BYTE $0xc4   // vpminsb    ymm0, ymm0, ymm4
    65  	LONG $0x386de2c4; BYTE $0xd5   // vpminsb    ymm2, ymm2, ymm5
    66  	LONG $0x3c75e2c4; BYTE $0xcc   // vpmaxsb    ymm1, ymm1, ymm4
    67  	LONG $0x3c65e2c4; BYTE $0xdd   // vpmaxsb    ymm3, ymm3, ymm5
    68  	LONG $0x387de2c4; BYTE $0xc6   // vpminsb    ymm0, ymm0, ymm6
    69  	LONG $0x386de2c4; BYTE $0xd7   // vpminsb    ymm2, ymm2, ymm7
    70  	LONG $0x3c75e2c4; BYTE $0xce   // vpmaxsb    ymm1, ymm1, ymm6
    71  	LONG $0x3c65e2c4; BYTE $0xdf   // vpmaxsb    ymm3, ymm3, ymm7
    72  	LONG $0x80e88348               // sub    rax, -128
    73  	LONG $0x02c68348               // add    rsi, 2
    74  	JNE  LBB0_7
    75  	LONG $0x01c0f641               // test    r8b, 1
    76  	JE   LBB0_10
    77  
    78  LBB0_9:
    79  	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
    80  	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
    81  	LONG $0x3c65e2c4; BYTE $0xdd   // vpmaxsb    ymm3, ymm3, ymm5
    82  	LONG $0x3c75e2c4; BYTE $0xcc   // vpmaxsb    ymm1, ymm1, ymm4
    83  	LONG $0x386de2c4; BYTE $0xd5   // vpminsb    ymm2, ymm2, ymm5
    84  	LONG $0x387de2c4; BYTE $0xc4   // vpminsb    ymm0, ymm0, ymm4
    85  
    86  LBB0_10:
    87  	LONG $0x3c75e2c4; BYTE $0xcb   // vpmaxsb    ymm1, ymm1, ymm3
    88  	LONG $0x397de3c4; WORD $0x01cb // vextracti128    xmm3, ymm1, 1
    89  	LONG $0x3c71e2c4; BYTE $0xcb   // vpmaxsb    xmm1, xmm1, xmm3
    90  	LONG $0x4deff1c5; BYTE $0x40   // vpxor    xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI0_2] */
    91  	LONG $0x387de2c4; BYTE $0xc2   // vpminsb    ymm0, ymm0, ymm2
    92  	LONG $0xd171e9c5; BYTE $0x08   // vpsrlw    xmm2, xmm1, 8
    93  	LONG $0xcadaf1c5               // vpminub    xmm1, xmm1, xmm2
    94  	LONG $0x4179e2c4; BYTE $0xc9   // vphminposuw    xmm1, xmm1
    95  	LONG $0x7e79c1c4; BYTE $0xc8   // vmovd    r8d, xmm1
    96  	LONG $0x7ff08041               // xor    r8b, 127
    97  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
    98  	LONG $0x3879e2c4; BYTE $0xc1   // vpminsb    xmm0, xmm0, xmm1
    99  	LONG $0x45eff9c5; BYTE $0x50   // vpxor    xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI0_3] */
   100  	LONG $0xd071f1c5; BYTE $0x08   // vpsrlw    xmm1, xmm0, 8
   101  	LONG $0xc1daf9c5               // vpminub    xmm0, xmm0, xmm1
   102  	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
   103  	LONG $0xc67ef9c5               // vmovd    esi, xmm0
   104  	LONG $0x80f68040               // xor    sil, -128
   105  	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
   106  	JE   LBB0_12
   107  
   108  LBB0_11:
   109  	LONG $0x04b60f42; BYTE $0x17 // movzx    eax, byte [rdi + r10]
   110  	WORD $0x3840; BYTE $0xc6     // cmp    sil, al
   111  	LONG $0xf6b60f40             // movzx    esi, sil
   112  	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
   113  	WORD $0x3841; BYTE $0xc0     // cmp    r8b, al
   114  	LONG $0xc0b60f45             // movzx    r8d, r8b
   115  	LONG $0xc04c0f44             // cmovl    r8d, eax
   116  	LONG $0x01c28349             // add    r10, 1
   117  	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
   118  	JNE  LBB0_11
   119  
   120  LBB0_12:
   121  	WORD $0x8844; BYTE $0x01 // mov    byte [rcx], r8b
   122  	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
   123  	VZEROUPPER
   124  	RET
   125  
   126  LBB0_5:
   127  	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI0_0] */
   128  	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI0_1] */
   129  	WORD $0xc031                 // xor    eax, eax
   130  	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
   131  	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
   132  	LONG $0x01c0f641             // test    r8b, 1
   133  	JNE  LBB0_9
   134  	JMP  LBB0_10
   135  
   136  TEXT ·_uint8_max_min_avx2(SB), $0-32
   137  
   138  	MOVQ values+0(FP), DI
   139  	MOVQ length+8(FP), SI
   140  	MOVQ minout+16(FP), DX
   141  	MOVQ maxout+24(FP), CX
   142  
   143  	WORD $0xf685             // test    esi, esi
   144  	JLE  LBB1_1
   145  	WORD $0x8941; BYTE $0xf1 // mov    r9d, esi
   146  	WORD $0xfe83; BYTE $0x3f // cmp    esi, 63
   147  	JA   LBB1_4
   148  	WORD $0xb640; BYTE $0xff // mov    sil, -1
   149  	WORD $0x3145; BYTE $0xd2 // xor    r10d, r10d
   150  	WORD $0xc031             // xor    eax, eax
   151  	JMP  LBB1_11
   152  
   153  LBB1_1:
   154  	WORD $0xb640; BYTE $0xff // mov    sil, -1
   155  	WORD $0xc031             // xor    eax, eax
   156  	JMP  LBB1_12
   157  
   158  LBB1_4:
   159  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   160  	LONG $0xc0e28341         // and    r10d, -64
   161  	LONG $0xc0428d49         // lea    rax, [r10 - 64]
   162  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
   163  	LONG $0x06e8c149         // shr    r8, 6
   164  	LONG $0x01c08349         // add    r8, 1
   165  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
   166  	JE   LBB1_5
   167  	WORD $0x894c; BYTE $0xc6 // mov    rsi, r8
   168  	LONG $0xfee68348         // and    rsi, -2
   169  	WORD $0xf748; BYTE $0xde // neg    rsi
   170  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
   171  	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
   172  	WORD $0xc031             // xor    eax, eax
   173  	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
   174  	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
   175  
   176  LBB1_7:
   177  	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
   178  	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
   179  	LONG $0x746ffec5; WORD $0x4007 // vmovdqu    ymm6, yword [rdi + rax + 64]
   180  	LONG $0x7c6ffec5; WORD $0x6007 // vmovdqu    ymm7, yword [rdi + rax + 96]
   181  	LONG $0xccdaf5c5               // vpminub    ymm1, ymm1, ymm4
   182  	LONG $0xd5daedc5               // vpminub    ymm2, ymm2, ymm5
   183  	LONG $0xc4defdc5               // vpmaxub    ymm0, ymm0, ymm4
   184  	LONG $0xdddee5c5               // vpmaxub    ymm3, ymm3, ymm5
   185  	LONG $0xcedaf5c5               // vpminub    ymm1, ymm1, ymm6
   186  	LONG $0xd7daedc5               // vpminub    ymm2, ymm2, ymm7
   187  	LONG $0xc6defdc5               // vpmaxub    ymm0, ymm0, ymm6
   188  	LONG $0xdfdee5c5               // vpmaxub    ymm3, ymm3, ymm7
   189  	LONG $0x80e88348               // sub    rax, -128
   190  	LONG $0x02c68348               // add    rsi, 2
   191  	JNE  LBB1_7
   192  	LONG $0x01c0f641               // test    r8b, 1
   193  	JE   LBB1_10
   194  
   195  LBB1_9:
   196  	LONG $0x246ffec5; BYTE $0x07   // vmovdqu    ymm4, yword [rdi + rax]
   197  	LONG $0x6c6ffec5; WORD $0x2007 // vmovdqu    ymm5, yword [rdi + rax + 32]
   198  	LONG $0xdddee5c5               // vpmaxub    ymm3, ymm3, ymm5
   199  	LONG $0xc4defdc5               // vpmaxub    ymm0, ymm0, ymm4
   200  	LONG $0xd5daedc5               // vpminub    ymm2, ymm2, ymm5
   201  	LONG $0xccdaf5c5               // vpminub    ymm1, ymm1, ymm4
   202  
   203  LBB1_10:
   204  	LONG $0xcadaf5c5               // vpminub    ymm1, ymm1, ymm2
   205  	LONG $0xc3defdc5               // vpmaxub    ymm0, ymm0, ymm3
   206  	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
   207  	LONG $0xc2def9c5               // vpmaxub    xmm0, xmm0, xmm2
   208  	LONG $0xd276e9c5               // vpcmpeqd    xmm2, xmm2, xmm2
   209  	LONG $0xc2eff9c5               // vpxor    xmm0, xmm0, xmm2
   210  	LONG $0xd071e9c5; BYTE $0x08   // vpsrlw    xmm2, xmm0, 8
   211  	LONG $0xc2daf9c5               // vpminub    xmm0, xmm0, xmm2
   212  	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
   213  	LONG $0xc07ef9c5               // vmovd    eax, xmm0
   214  	WORD $0xd0f6                   // not    al
   215  	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
   216  	LONG $0xc0daf1c5               // vpminub    xmm0, xmm1, xmm0
   217  	LONG $0xd071f1c5; BYTE $0x08   // vpsrlw    xmm1, xmm0, 8
   218  	LONG $0xc1daf9c5               // vpminub    xmm0, xmm0, xmm1
   219  	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
   220  	LONG $0xc67ef9c5               // vmovd    esi, xmm0
   221  	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
   222  	JE   LBB1_12
   223  
   224  LBB1_11:
   225  	LONG $0x04b60f46; BYTE $0x17 // movzx    r8d, byte [rdi + r10]
   226  	WORD $0x3844; BYTE $0xc6     // cmp    sil, r8b
   227  	LONG $0xf6b60f40             // movzx    esi, sil
   228  	LONG $0xf0430f41             // cmovae    esi, r8d
   229  	WORD $0x3844; BYTE $0xc0     // cmp    al, r8b
   230  	WORD $0xb60f; BYTE $0xc0     // movzx    eax, al
   231  	LONG $0xc0460f41             // cmovbe    eax, r8d
   232  	LONG $0x01c28349             // add    r10, 1
   233  	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
   234  	JNE  LBB1_11
   235  
   236  LBB1_12:
   237  	WORD $0x0188             // mov    byte [rcx], al
   238  	WORD $0x8840; BYTE $0x32 // mov    byte [rdx], sil
   239  	VZEROUPPER
   240  	RET
   241  
   242  LBB1_5:
   243  	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
   244  	LONG $0xc976f5c5 // vpcmpeqd    ymm1, ymm1, ymm1
   245  	WORD $0xc031     // xor    eax, eax
   246  	LONG $0xd276edc5 // vpcmpeqd    ymm2, ymm2, ymm2
   247  	LONG $0xdbefe1c5 // vpxor    xmm3, xmm3, xmm3
   248  	LONG $0x01c0f641 // test    r8b, 1
   249  	JNE  LBB1_9
   250  	JMP  LBB1_10
   251  
   252  DATA LCDATA2<>+0x000(SB)/8, $0x8000800080008000
   253  DATA LCDATA2<>+0x008(SB)/8, $0x8000800080008000
   254  DATA LCDATA2<>+0x010(SB)/8, $0x8000800080008000
   255  DATA LCDATA2<>+0x018(SB)/8, $0x8000800080008000
   256  DATA LCDATA2<>+0x020(SB)/8, $0x7fff7fff7fff7fff
   257  DATA LCDATA2<>+0x028(SB)/8, $0x7fff7fff7fff7fff
   258  DATA LCDATA2<>+0x030(SB)/8, $0x7fff7fff7fff7fff
   259  DATA LCDATA2<>+0x038(SB)/8, $0x7fff7fff7fff7fff
   260  DATA LCDATA2<>+0x040(SB)/8, $0x7fff7fff7fff7fff
   261  DATA LCDATA2<>+0x048(SB)/8, $0x7fff7fff7fff7fff
   262  DATA LCDATA2<>+0x050(SB)/8, $0x8000800080008000
   263  DATA LCDATA2<>+0x058(SB)/8, $0x8000800080008000
   264  GLOBL LCDATA2<>(SB), 8, $96
   265  
   266  TEXT ·_int16_max_min_avx2(SB), $0-32
   267  
   268  	MOVQ values+0(FP), DI
   269  	MOVQ length+8(FP), SI
   270  	MOVQ minout+16(FP), DX
   271  	MOVQ maxout+24(FP), CX
   272  	LEAQ LCDATA2<>(SB), BP
   273  
   274  	WORD $0xf685                 // test    esi, esi
   275  	JLE  LBB2_1
   276  	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
   277  	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
   278  	JA   LBB2_4
   279  	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
   280  	LONG $0x7fffbe66             // mov    si, 32767
   281  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
   282  	JMP  LBB2_11
   283  
   284  LBB2_1:
   285  	LONG $0x7fffbe66             // mov    si, 32767
   286  	LONG $0x00b84166; BYTE $0x80 // mov    r8w, -32768
   287  	JMP  LBB2_12
   288  
   289  LBB2_4:
   290  	WORD $0x8945; BYTE $0xca     // mov    r10d, r9d
   291  	LONG $0xe0e28341             // and    r10d, -32
   292  	LONG $0xe0428d49             // lea    rax, [r10 - 32]
   293  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
   294  	LONG $0x05e8c149             // shr    r8, 5
   295  	LONG $0x01c08349             // add    r8, 1
   296  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
   297  	JE   LBB2_5
   298  	WORD $0x894c; BYTE $0xc6     // mov    rsi, r8
   299  	LONG $0xfee68348             // and    rsi, -2
   300  	WORD $0xf748; BYTE $0xde     // neg    rsi
   301  	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
   302  	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
   303  	WORD $0xc031                 // xor    eax, eax
   304  	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
   305  	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
   306  
   307  LBB2_7:
   308  	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
   309  	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
   310  	LONG $0x746ffec5; WORD $0x4047 // vmovdqu    ymm6, yword [rdi + 2*rax + 64]
   311  	LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu    ymm7, yword [rdi + 2*rax + 96]
   312  	LONG $0xc4eafdc5               // vpminsw    ymm0, ymm0, ymm4
   313  	LONG $0xd5eaedc5               // vpminsw    ymm2, ymm2, ymm5
   314  	LONG $0xcceef5c5               // vpmaxsw    ymm1, ymm1, ymm4
   315  	LONG $0xddeee5c5               // vpmaxsw    ymm3, ymm3, ymm5
   316  	LONG $0xc6eafdc5               // vpminsw    ymm0, ymm0, ymm6
   317  	LONG $0xd7eaedc5               // vpminsw    ymm2, ymm2, ymm7
   318  	LONG $0xceeef5c5               // vpmaxsw    ymm1, ymm1, ymm6
   319  	LONG $0xdfeee5c5               // vpmaxsw    ymm3, ymm3, ymm7
   320  	LONG $0x40c08348               // add    rax, 64
   321  	LONG $0x02c68348               // add    rsi, 2
   322  	JNE  LBB2_7
   323  	LONG $0x01c0f641               // test    r8b, 1
   324  	JE   LBB2_10
   325  
   326  LBB2_9:
   327  	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
   328  	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
   329  	LONG $0xddeee5c5               // vpmaxsw    ymm3, ymm3, ymm5
   330  	LONG $0xcceef5c5               // vpmaxsw    ymm1, ymm1, ymm4
   331  	LONG $0xd5eaedc5               // vpminsw    ymm2, ymm2, ymm5
   332  	LONG $0xc4eafdc5               // vpminsw    ymm0, ymm0, ymm4
   333  
   334  LBB2_10:
   335  	LONG $0xcbeef5c5                           // vpmaxsw    ymm1, ymm1, ymm3
   336  	LONG $0x397de3c4; WORD $0x01cb             // vextracti128    xmm3, ymm1, 1
   337  	LONG $0xcbeef1c5                           // vpmaxsw    xmm1, xmm1, xmm3
   338  	LONG $0x4deff1c5; BYTE $0x40               // vpxor    xmm1, xmm1, oword 64[rbp] /* [rip + .LCPI2_2] */
   339  	LONG $0xc2eafdc5                           // vpminsw    ymm0, ymm0, ymm2
   340  	LONG $0x4179e2c4; BYTE $0xc9               // vphminposuw    xmm1, xmm1
   341  	LONG $0x7e79c1c4; BYTE $0xc8               // vmovd    r8d, xmm1
   342  	LONG $0xfff08141; WORD $0x007f; BYTE $0x00 // xor    r8d, 32767
   343  	LONG $0x397de3c4; WORD $0x01c1             // vextracti128    xmm1, ymm0, 1
   344  	LONG $0xc1eaf9c5                           // vpminsw    xmm0, xmm0, xmm1
   345  	LONG $0x45eff9c5; BYTE $0x50               // vpxor    xmm0, xmm0, oword 80[rbp] /* [rip + .LCPI2_3] */
   346  	LONG $0x4179e2c4; BYTE $0xc0               // vphminposuw    xmm0, xmm0
   347  	LONG $0xc67ef9c5                           // vmovd    esi, xmm0
   348  	LONG $0x8000f681; WORD $0x0000             // xor    esi, 32768
   349  	WORD $0x394d; BYTE $0xca                   // cmp    r10, r9
   350  	JE   LBB2_12
   351  
   352  LBB2_11:
   353  	LONG $0x04b70f42; BYTE $0x57 // movzx    eax, word [rdi + 2*r10]
   354  	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
   355  	WORD $0x4f0f; BYTE $0xf0     // cmovg    esi, eax
   356  	LONG $0xc0394166             // cmp    r8w, ax
   357  	LONG $0xc04c0f44             // cmovl    r8d, eax
   358  	LONG $0x01c28349             // add    r10, 1
   359  	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
   360  	JNE  LBB2_11
   361  
   362  LBB2_12:
   363  	LONG $0x01894466         // mov    word [rcx], r8w
   364  	WORD $0x8966; BYTE $0x32 // mov    word [rdx], si
   365  	VZEROUPPER
   366  	RET
   367  
   368  LBB2_5:
   369  	LONG $0x4d6ffdc5; BYTE $0x00 // vmovdqa    ymm1, yword 0[rbp] /* [rip + .LCPI2_0] */
   370  	LONG $0x456ffdc5; BYTE $0x20 // vmovdqa    ymm0, yword 32[rbp] /* [rip + .LCPI2_1] */
   371  	WORD $0xc031                 // xor    eax, eax
   372  	LONG $0xd06ffdc5             // vmovdqa    ymm2, ymm0
   373  	LONG $0xd96ffdc5             // vmovdqa    ymm3, ymm1
   374  	LONG $0x01c0f641             // test    r8b, 1
   375  	JNE  LBB2_9
   376  	JMP  LBB2_10
   377  
   378  TEXT ·_uint16_max_min_avx2(SB), $0-32
   379  
   380  	MOVQ values+0(FP), DI
   381  	MOVQ length+8(FP), SI
   382  	MOVQ minout+16(FP), DX
   383  	MOVQ maxout+24(FP), CX
   384  
   385  	WORD $0xf685                 // test    esi, esi
   386  	JLE  LBB3_1
   387  	WORD $0x8941; BYTE $0xf1     // mov    r9d, esi
   388  	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
   389  	JA   LBB3_4
   390  	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
   391  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
   392  	WORD $0xf631                 // xor    esi, esi
   393  	JMP  LBB3_11
   394  
   395  LBB3_1:
   396  	LONG $0xffb84166; BYTE $0xff // mov    r8w, -1
   397  	WORD $0xf631                 // xor    esi, esi
   398  	JMP  LBB3_12
   399  
   400  LBB3_4:
   401  	WORD $0x8945; BYTE $0xca // mov    r10d, r9d
   402  	LONG $0xe0e28341         // and    r10d, -32
   403  	LONG $0xe0428d49         // lea    rax, [r10 - 32]
   404  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
   405  	LONG $0x05e8c149         // shr    r8, 5
   406  	LONG $0x01c08349         // add    r8, 1
   407  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
   408  	JE   LBB3_5
   409  	WORD $0x894c; BYTE $0xc6 // mov    rsi, r8
   410  	LONG $0xfee68348         // and    rsi, -2
   411  	WORD $0xf748; BYTE $0xde // neg    rsi
   412  	LONG $0xc0eff9c5         // vpxor    xmm0, xmm0, xmm0
   413  	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
   414  	WORD $0xc031             // xor    eax, eax
   415  	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
   416  	LONG $0xdbefe1c5         // vpxor    xmm3, xmm3, xmm3
   417  
   418  LBB3_7:
   419  	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
   420  	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
   421  	LONG $0x746ffec5; WORD $0x4047 // vmovdqu    ymm6, yword [rdi + 2*rax + 64]
   422  	LONG $0x7c6ffec5; WORD $0x6047 // vmovdqu    ymm7, yword [rdi + 2*rax + 96]
   423  	LONG $0x3a75e2c4; BYTE $0xcc   // vpminuw    ymm1, ymm1, ymm4
   424  	LONG $0x3a6de2c4; BYTE $0xd5   // vpminuw    ymm2, ymm2, ymm5
   425  	LONG $0x3e7de2c4; BYTE $0xc4   // vpmaxuw    ymm0, ymm0, ymm4
   426  	LONG $0x3e65e2c4; BYTE $0xdd   // vpmaxuw    ymm3, ymm3, ymm5
   427  	LONG $0x3a75e2c4; BYTE $0xce   // vpminuw    ymm1, ymm1, ymm6
   428  	LONG $0x3a6de2c4; BYTE $0xd7   // vpminuw    ymm2, ymm2, ymm7
   429  	LONG $0x3e7de2c4; BYTE $0xc6   // vpmaxuw    ymm0, ymm0, ymm6
   430  	LONG $0x3e65e2c4; BYTE $0xdf   // vpmaxuw    ymm3, ymm3, ymm7
   431  	LONG $0x40c08348               // add    rax, 64
   432  	LONG $0x02c68348               // add    rsi, 2
   433  	JNE  LBB3_7
   434  	LONG $0x01c0f641               // test    r8b, 1
   435  	JE   LBB3_10
   436  
   437  LBB3_9:
   438  	LONG $0x246ffec5; BYTE $0x47   // vmovdqu    ymm4, yword [rdi + 2*rax]
   439  	LONG $0x6c6ffec5; WORD $0x2047 // vmovdqu    ymm5, yword [rdi + 2*rax + 32]
   440  	LONG $0x3e65e2c4; BYTE $0xdd   // vpmaxuw    ymm3, ymm3, ymm5
   441  	LONG $0x3e7de2c4; BYTE $0xc4   // vpmaxuw    ymm0, ymm0, ymm4
   442  	LONG $0x3a6de2c4; BYTE $0xd5   // vpminuw    ymm2, ymm2, ymm5
   443  	LONG $0x3a75e2c4; BYTE $0xcc   // vpminuw    ymm1, ymm1, ymm4
   444  
   445  LBB3_10:
   446  	LONG $0x3a75e2c4; BYTE $0xca   // vpminuw    ymm1, ymm1, ymm2
   447  	LONG $0x3e7de2c4; BYTE $0xc3   // vpmaxuw    ymm0, ymm0, ymm3
   448  	LONG $0x397de3c4; WORD $0x01c2 // vextracti128    xmm2, ymm0, 1
   449  	LONG $0x3e79e2c4; BYTE $0xc2   // vpmaxuw    xmm0, xmm0, xmm2
   450  	LONG $0xd276e9c5               // vpcmpeqd    xmm2, xmm2, xmm2
   451  	LONG $0xc2eff9c5               // vpxor    xmm0, xmm0, xmm2
   452  	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
   453  	LONG $0xc67ef9c5               // vmovd    esi, xmm0
   454  	WORD $0xd6f7                   // not    esi
   455  	LONG $0x397de3c4; WORD $0x01c8 // vextracti128    xmm0, ymm1, 1
   456  	LONG $0x3a71e2c4; BYTE $0xc0   // vpminuw    xmm0, xmm1, xmm0
   457  	LONG $0x4179e2c4; BYTE $0xc0   // vphminposuw    xmm0, xmm0
   458  	LONG $0x7e79c1c4; BYTE $0xc0   // vmovd    r8d, xmm0
   459  	WORD $0x394d; BYTE $0xca       // cmp    r10, r9
   460  	JE   LBB3_12
   461  
   462  LBB3_11:
   463  	LONG $0x04b70f42; BYTE $0x57 // movzx    eax, word [rdi + 2*r10]
   464  	LONG $0xc0394166             // cmp    r8w, ax
   465  	LONG $0xc0430f44             // cmovae    r8d, eax
   466  	WORD $0x3966; BYTE $0xc6     // cmp    si, ax
   467  	WORD $0x460f; BYTE $0xf0     // cmovbe    esi, eax
   468  	LONG $0x01c28349             // add    r10, 1
   469  	WORD $0x394d; BYTE $0xd1     // cmp    r9, r10
   470  	JNE  LBB3_11
   471  
   472  LBB3_12:
   473  	WORD $0x8966; BYTE $0x31 // mov    word [rcx], si
   474  	LONG $0x02894466         // mov    word [rdx], r8w
   475  	VZEROUPPER
   476  	RET
   477  
   478  LBB3_5:
   479  	LONG $0xc0eff9c5 // vpxor    xmm0, xmm0, xmm0
   480  	LONG $0xc976f5c5 // vpcmpeqd    ymm1, ymm1, ymm1
   481  	WORD $0xc031     // xor    eax, eax
   482  	LONG $0xd276edc5 // vpcmpeqd    ymm2, ymm2, ymm2
   483  	LONG $0xdbefe1c5 // vpxor    xmm3, xmm3, xmm3
   484  	LONG $0x01c0f641 // test    r8b, 1
   485  	JNE  LBB3_9
   486  	JMP  LBB3_10
   487  
   488  DATA LCDATA3<>+0x000(SB)/8, $0x7fffffff80000000
   489  GLOBL LCDATA3<>(SB), 8, $8
   490  
   491  TEXT ·_int32_max_min_avx2(SB), $0-32
   492  
   493  	MOVQ values+0(FP), DI
   494  	MOVQ length+8(FP), SI
   495  	MOVQ minout+16(FP), DX
   496  	MOVQ maxout+24(FP), CX
   497  	LEAQ LCDATA3<>(SB), BP
   498  
   499  	WORD $0xf685                   // test    esi, esi
   500  	JLE  LBB4_1
   501  	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
   502  	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
   503  	JA   LBB4_4
   504  	LONG $0x0000ba41; WORD $0x8000 // mov    r10d, -2147483648
   505  	LONG $0xffffffb8; BYTE $0x7f   // mov    eax, 2147483647
   506  	WORD $0x3145; BYTE $0xc9       // xor    r9d, r9d
   507  	JMP  LBB4_7
   508  
   509  LBB4_1:
   510  	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
   511  	LONG $0x000000be; BYTE $0x80 // mov    esi, -2147483648
   512  	JMP  LBB4_8
   513  
   514  LBB4_4:
   515  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
   516  	LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd    ymm4, dword 0[rbp] /* [rip + .LCPI4_0] */
   517  	LONG $0xe0e18341               // and    r9d, -32
   518  	LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd    ymm0, dword 4[rbp] /* [rip + .LCPI4_1] */
   519  	WORD $0xc031                   // xor    eax, eax
   520  	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
   521  	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
   522  	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
   523  	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
   524  	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
   525  	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
   526  
   527  LBB4_5:
   528  	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
   529  	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
   530  	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
   531  	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
   532  	LONG $0x397dc2c4; BYTE $0xc0   // vpminsd    ymm0, ymm0, ymm8
   533  	LONG $0x3975c2c4; BYTE $0xc9   // vpminsd    ymm1, ymm1, ymm9
   534  	LONG $0x396dc2c4; BYTE $0xd2   // vpminsd    ymm2, ymm2, ymm10
   535  	LONG $0x3965c2c4; BYTE $0xdb   // vpminsd    ymm3, ymm3, ymm11
   536  	LONG $0x3d5dc2c4; BYTE $0xe0   // vpmaxsd    ymm4, ymm4, ymm8
   537  	LONG $0x3d55c2c4; BYTE $0xe9   // vpmaxsd    ymm5, ymm5, ymm9
   538  	LONG $0x3d4dc2c4; BYTE $0xf2   // vpmaxsd    ymm6, ymm6, ymm10
   539  	LONG $0x3d45c2c4; BYTE $0xfb   // vpmaxsd    ymm7, ymm7, ymm11
   540  	LONG $0x20c08348               // add    rax, 32
   541  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   542  	JNE  LBB4_5
   543  	LONG $0x3d5de2c4; BYTE $0xe5   // vpmaxsd    ymm4, ymm4, ymm5
   544  	LONG $0x3d5de2c4; BYTE $0xe6   // vpmaxsd    ymm4, ymm4, ymm6
   545  	LONG $0x3d5de2c4; BYTE $0xe7   // vpmaxsd    ymm4, ymm4, ymm7
   546  	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
   547  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
   548  	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
   549  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
   550  	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
   551  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
   552  	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
   553  	LONG $0x397de2c4; BYTE $0xc1   // vpminsd    ymm0, ymm0, ymm1
   554  	LONG $0x397de2c4; BYTE $0xc2   // vpminsd    ymm0, ymm0, ymm2
   555  	LONG $0x397de2c4; BYTE $0xc3   // vpminsd    ymm0, ymm0, ymm3
   556  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
   557  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
   558  	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
   559  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
   560  	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
   561  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
   562  	LONG $0xc07ef9c5               // vmovd    eax, xmm0
   563  	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
   564  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   565  	JE   LBB4_8
   566  
   567  LBB4_7:
   568  	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
   569  	WORD $0xf039             // cmp    eax, esi
   570  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
   571  	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
   572  	LONG $0xf24d0f41         // cmovge    esi, r10d
   573  	LONG $0x01c18349         // add    r9, 1
   574  	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
   575  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   576  	JNE  LBB4_7
   577  
   578  LBB4_8:
   579  	WORD $0x3189 // mov    dword [rcx], esi
   580  	WORD $0x0289 // mov    dword [rdx], eax
   581  	VZEROUPPER
   582  	RET
   583  
   584  TEXT ·_uint32_max_min_avx2(SB), $0-32
   585  
   586  	MOVQ values+0(FP), DI
   587  	MOVQ length+8(FP), SI
   588  	MOVQ minout+16(FP), DX
   589  	MOVQ maxout+24(FP), CX
   590  
   591  	WORD $0xf685                 // test    esi, esi
   592  	JLE  LBB5_1
   593  	WORD $0x8941; BYTE $0xf0     // mov    r8d, esi
   594  	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
   595  	JA   LBB5_4
   596  	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
   597  	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
   598  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
   599  	JMP  LBB5_7
   600  
   601  LBB5_1:
   602  	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
   603  	WORD $0xf631                 // xor    esi, esi
   604  	JMP  LBB5_8
   605  
   606  LBB5_4:
   607  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   608  	LONG $0xe0e18341         // and    r9d, -32
   609  	LONG $0xe4efd9c5         // vpxor    xmm4, xmm4, xmm4
   610  	LONG $0xc076fdc5         // vpcmpeqd    ymm0, ymm0, ymm0
   611  	WORD $0xc031             // xor    eax, eax
   612  	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
   613  	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
   614  	LONG $0xdb76e5c5         // vpcmpeqd    ymm3, ymm3, ymm3
   615  	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
   616  	LONG $0xf6efc9c5         // vpxor    xmm6, xmm6, xmm6
   617  	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
   618  
   619  LBB5_5:
   620  	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
   621  	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
   622  	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
   623  	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
   624  	LONG $0x3b7dc2c4; BYTE $0xc0   // vpminud    ymm0, ymm0, ymm8
   625  	LONG $0x3b75c2c4; BYTE $0xc9   // vpminud    ymm1, ymm1, ymm9
   626  	LONG $0x3b6dc2c4; BYTE $0xd2   // vpminud    ymm2, ymm2, ymm10
   627  	LONG $0x3b65c2c4; BYTE $0xdb   // vpminud    ymm3, ymm3, ymm11
   628  	LONG $0x3f5dc2c4; BYTE $0xe0   // vpmaxud    ymm4, ymm4, ymm8
   629  	LONG $0x3f55c2c4; BYTE $0xe9   // vpmaxud    ymm5, ymm5, ymm9
   630  	LONG $0x3f4dc2c4; BYTE $0xf2   // vpmaxud    ymm6, ymm6, ymm10
   631  	LONG $0x3f45c2c4; BYTE $0xfb   // vpmaxud    ymm7, ymm7, ymm11
   632  	LONG $0x20c08348               // add    rax, 32
   633  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   634  	JNE  LBB5_5
   635  	LONG $0x3f5de2c4; BYTE $0xe5   // vpmaxud    ymm4, ymm4, ymm5
   636  	LONG $0x3f5de2c4; BYTE $0xe6   // vpmaxud    ymm4, ymm4, ymm6
   637  	LONG $0x3f5de2c4; BYTE $0xe7   // vpmaxud    ymm4, ymm4, ymm7
   638  	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
   639  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   640  	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
   641  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   642  	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
   643  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   644  	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
   645  	LONG $0x3b7de2c4; BYTE $0xc1   // vpminud    ymm0, ymm0, ymm1
   646  	LONG $0x3b7de2c4; BYTE $0xc2   // vpminud    ymm0, ymm0, ymm2
   647  	LONG $0x3b7de2c4; BYTE $0xc3   // vpminud    ymm0, ymm0, ymm3
   648  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
   649  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   650  	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
   651  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   652  	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
   653  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   654  	LONG $0xc07ef9c5               // vmovd    eax, xmm0
   655  	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
   656  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   657  	JE   LBB5_8
   658  
   659  LBB5_7:
   660  	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
   661  	WORD $0xf039             // cmp    eax, esi
   662  	WORD $0x430f; BYTE $0xc6 // cmovae    eax, esi
   663  	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
   664  	LONG $0xf2470f41         // cmova    esi, r10d
   665  	LONG $0x01c18349         // add    r9, 1
   666  	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
   667  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   668  	JNE  LBB5_7
   669  
   670  LBB5_8:
   671  	WORD $0x3189 // mov    dword [rcx], esi
   672  	WORD $0x0289 // mov    dword [rdx], eax
   673  	VZEROUPPER
   674  	RET
   675  
   676  DATA LCDATA4<>+0x000(SB)/8, $0x8000000000000000
   677  DATA LCDATA4<>+0x008(SB)/8, $0x7fffffffffffffff
   678  GLOBL LCDATA4<>(SB), 8, $16
   679  
   680  TEXT ·_int64_max_min_avx2(SB), $0-32
   681  
   682  	MOVQ values+0(FP), DI
   683  	MOVQ length+8(FP), SI
   684  	MOVQ minout+16(FP), DX
   685  	MOVQ maxout+24(FP), CX
   686  	LEAQ LCDATA4<>(SB), BP
   687  
   688  	QUAD $0xffffffffffffb848; WORD $0x7fff // mov    rax, 9223372036854775807
   689  	WORD $0xf685                           // test    esi, esi
   690  	JLE  LBB6_1
   691  	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
   692  	WORD $0xfe83; BYTE $0x0f               // cmp    esi, 15
   693  	JA   LBB6_4
   694  	LONG $0x01508d4c                       // lea    r10, [rax + 1]
   695  	WORD $0x3145; BYTE $0xc9               // xor    r9d, r9d
   696  	JMP  LBB6_7
   697  
   698  LBB6_1:
   699  	LONG $0x01708d48 // lea    rsi, [rax + 1]
   700  	JMP  LBB6_8
   701  
   702  LBB6_4:
   703  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
   704  	LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq    ymm4, qword 0[rbp] /* [rip + .LCPI6_0] */
   705  	LONG $0xf0e18341               // and    r9d, -16
   706  	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI6_1] */
   707  	WORD $0xc031                   // xor    eax, eax
   708  	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
   709  	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
   710  	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
   711  	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
   712  	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
   713  	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
   714  
   715  LBB6_5:
   716  	LONG $0x046f7ec5; BYTE $0xc7   // vmovdqu    ymm8, yword [rdi + 8*rax]
   717  	LONG $0x373d62c4; BYTE $0xc8   // vpcmpgtq    ymm9, ymm8, ymm0
   718  	LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd    ymm0, ymm8, ymm0, ymm9
   719  	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
   720  	LONG $0x373562c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm9, ymm3
   721  	LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd    ymm3, ymm9, ymm3, ymm10
   722  	LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu    ymm10, yword [rdi + 8*rax + 64]
   723  	LONG $0x372d62c4; BYTE $0xda   // vpcmpgtq    ymm11, ymm10, ymm2
   724  	LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd    ymm2, ymm10, ymm2, ymm11
   725  	LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 96]
   726  	LONG $0x372562c4; BYTE $0xe1   // vpcmpgtq    ymm12, ymm11, ymm1
   727  	LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd    ymm1, ymm11, ymm1, ymm12
   728  	LONG $0x375d42c4; BYTE $0xe0   // vpcmpgtq    ymm12, ymm4, ymm8
   729  	LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd    ymm4, ymm8, ymm4, ymm12
   730  	LONG $0x374542c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm7, ymm9
   731  	LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd    ymm7, ymm9, ymm7, ymm8
   732  	LONG $0x374d42c4; BYTE $0xc2   // vpcmpgtq    ymm8, ymm6, ymm10
   733  	LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd    ymm6, ymm10, ymm6, ymm8
   734  	LONG $0x375542c4; BYTE $0xc3   // vpcmpgtq    ymm8, ymm5, ymm11
   735  	LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm11, ymm5, ymm8
   736  	LONG $0x10c08348               // add    rax, 16
   737  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   738  	JNE  LBB6_5
   739  	LONG $0x375d62c4; BYTE $0xc7   // vpcmpgtq    ymm8, ymm4, ymm7
   740  	LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd    ymm4, ymm7, ymm4, ymm8
   741  	LONG $0x375de2c4; BYTE $0xfe   // vpcmpgtq    ymm7, ymm4, ymm6
   742  	LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd    ymm4, ymm6, ymm4, ymm7
   743  	LONG $0x375de2c4; BYTE $0xf5   // vpcmpgtq    ymm6, ymm4, ymm5
   744  	LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd    ymm4, ymm5, ymm4, ymm6
   745  	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
   746  	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
   747  	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
   748  	LONG $0x0479e3c4; WORD $0x4eec // vpermilps    xmm5, xmm4, 78
   749  	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
   750  	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
   751  	LONG $0x7ef9c1c4; BYTE $0xe2   // vmovq    r10, xmm4
   752  	LONG $0x3765e2c4; BYTE $0xe0   // vpcmpgtq    ymm4, ymm3, ymm0
   753  	LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd    ymm0, ymm3, ymm0, ymm4
   754  	LONG $0x376de2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm2, ymm0
   755  	LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd    ymm0, ymm2, ymm0, ymm3
   756  	LONG $0x3775e2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm1, ymm0
   757  	LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd    ymm0, ymm1, ymm0, ymm2
   758  	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
   759  	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
   760  	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
   761  	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
   762  	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
   763  	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
   764  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
   765  	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
   766  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   767  	JE   LBB6_8
   768  
   769  LBB6_7:
   770  	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
   771  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   772  	LONG $0xc64f0f48         // cmovg    rax, rsi
   773  	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
   774  	LONG $0xf24d0f49         // cmovge    rsi, r10
   775  	LONG $0x01c18349         // add    r9, 1
   776  	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
   777  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   778  	JNE  LBB6_7
   779  
   780  LBB6_8:
   781  	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
   782  	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
   783  	VZEROUPPER
   784  	RET
   785  
   786  DATA LCDATA5<>+0x000(SB)/8, $0x8000000000000000
   787  GLOBL LCDATA5<>(SB), 8, $8
   788  
   789  TEXT ·_uint64_max_min_avx2(SB), $0-32
   790  
   791  	MOVQ values+0(FP), DI
   792  	MOVQ length+8(FP), SI
   793  	MOVQ minout+16(FP), DX
   794  	MOVQ maxout+24(FP), CX
   795  	LEAQ LCDATA5<>(SB), BP
   796  
   797  	WORD $0xf685                               // test    esi, esi
   798  	JLE  LBB7_1
   799  	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
   800  	WORD $0xfe83; BYTE $0x0f                   // cmp    esi, 15
   801  	JA   LBB7_4
   802  	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
   803  	WORD $0x3145; BYTE $0xc9                   // xor    r9d, r9d
   804  	WORD $0x3145; BYTE $0xd2                   // xor    r10d, r10d
   805  	JMP  LBB7_7
   806  
   807  LBB7_1:
   808  	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
   809  	WORD $0xf631                               // xor    esi, esi
   810  	JMP  LBB7_8
   811  
   812  LBB7_4:
   813  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
   814  	LONG $0xf0e18341               // and    r9d, -16
   815  	LONG $0xedefd1c5               // vpxor    xmm5, xmm5, xmm5
   816  	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
   817  	WORD $0xc031                   // xor    eax, eax
   818  	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI7_0] */
   819  	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
   820  	LONG $0xdb76e5c5               // vpcmpeqd    ymm3, ymm3, ymm3
   821  	LONG $0xd276edc5               // vpcmpeqd    ymm2, ymm2, ymm2
   822  	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
   823  	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
   824  	LONG $0xf6efc9c5               // vpxor    xmm6, xmm6, xmm6
   825  
   826  LBB7_5:
   827  	LONG $0x0c6f7ec5; BYTE $0xc7   // vmovdqu    ymm9, yword [rdi + 8*rax]
   828  	LONG $0xd0ef75c5               // vpxor    ymm10, ymm1, ymm0
   829  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   830  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   831  	LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd    ymm1, ymm9, ymm1, ymm10
   832  	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
   833  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   834  	LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd    ymm5, ymm9, ymm5, ymm10
   835  	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
   836  	LONG $0xd0ef5dc5               // vpxor    ymm10, ymm4, ymm0
   837  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   838  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   839  	LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd    ymm4, ymm9, ymm4, ymm10
   840  	LONG $0xd0ef3dc5               // vpxor    ymm10, ymm8, ymm0
   841  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   842  	LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 64]
   843  	LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd    ymm8, ymm9, ymm8, ymm10
   844  	LONG $0xc8ef65c5               // vpxor    ymm9, ymm3, ymm0
   845  	LONG $0xd0ef25c5               // vpxor    ymm10, ymm11, ymm0
   846  	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
   847  	LONG $0x4b25e3c4; WORD $0x90db // vblendvpd    ymm3, ymm11, ymm3, ymm9
   848  	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
   849  	LONG $0x373542c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm9, ymm10
   850  	LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd    ymm7, ymm11, ymm7, ymm9
   851  	LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 96]
   852  	LONG $0xd0ef6dc5               // vpxor    ymm10, ymm2, ymm0
   853  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   854  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   855  	LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd    ymm2, ymm9, ymm2, ymm10
   856  	LONG $0xd0ef4dc5               // vpxor    ymm10, ymm6, ymm0
   857  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   858  	LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd    ymm6, ymm9, ymm6, ymm10
   859  	LONG $0x10c08348               // add    rax, 16
   860  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   861  	JNE  LBB7_5
   862  	LONG $0xc8ef3dc5               // vpxor    ymm9, ymm8, ymm0
   863  	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
   864  	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
   865  	LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd    ymm5, ymm8, ymm5, ymm9
   866  	LONG $0xc05755c5               // vxorpd    ymm8, ymm5, ymm0
   867  	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
   868  	LONG $0x373d42c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm8, ymm9
   869  	LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm7, ymm5, ymm8
   870  	LONG $0xf857d5c5               // vxorpd    ymm7, ymm5, ymm0
   871  	LONG $0xc0ef4dc5               // vpxor    ymm8, ymm6, ymm0
   872  	LONG $0x3745c2c4; BYTE $0xf8   // vpcmpgtq    ymm7, ymm7, ymm8
   873  	LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd    ymm5, ymm6, ymm5, ymm7
   874  	LONG $0x197de3c4; WORD $0x01ee // vextractf128    xmm6, ymm5, 1
   875  	LONG $0xc05749c5               // vxorpd    xmm8, xmm6, xmm0
   876  	LONG $0xf857d1c5               // vxorpd    xmm7, xmm5, xmm0
   877  	LONG $0x3741c2c4; BYTE $0xf8   // vpcmpgtq    xmm7, xmm7, xmm8
   878  	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
   879  	LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps    xmm6, xmm5, 78
   880  	LONG $0xc05751c5               // vxorpd    xmm8, xmm5, xmm0
   881  	LONG $0xf857c9c5               // vxorpd    xmm7, xmm6, xmm0
   882  	LONG $0x3739e2c4; BYTE $0xff   // vpcmpgtq    xmm7, xmm8, xmm7
   883  	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
   884  	LONG $0xf0eff5c5               // vpxor    ymm6, ymm1, ymm0
   885  	LONG $0xf8efddc5               // vpxor    ymm7, ymm4, ymm0
   886  	LONG $0x3745e2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm7, ymm6
   887  	LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd    ymm1, ymm4, ymm1, ymm6
   888  	LONG $0xe057f5c5               // vxorpd    ymm4, ymm1, ymm0
   889  	LONG $0xf0efe5c5               // vpxor    ymm6, ymm3, ymm0
   890  	LONG $0x374de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm6, ymm4
   891  	LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm3, ymm1, ymm4
   892  	LONG $0x7ef9c1c4; BYTE $0xea   // vmovq    r10, xmm5
   893  	LONG $0xd857f5c5               // vxorpd    ymm3, ymm1, ymm0
   894  	LONG $0xe0efedc5               // vpxor    ymm4, ymm2, ymm0
   895  	LONG $0x375de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm4, ymm3
   896  	LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd    ymm1, ymm2, ymm1, ymm3
   897  	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
   898  	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
   899  	LONG $0xe057e9c5               // vxorpd    xmm4, xmm2, xmm0
   900  	LONG $0x3759e2c4; BYTE $0xdb   // vpcmpgtq    xmm3, xmm4, xmm3
   901  	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
   902  	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
   903  	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
   904  	LONG $0xc057e9c5               // vxorpd    xmm0, xmm2, xmm0
   905  	LONG $0x3779e2c4; BYTE $0xc3   // vpcmpgtq    xmm0, xmm0, xmm3
   906  	LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd    xmm0, xmm2, xmm1, xmm0
   907  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
   908  	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
   909  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   910  	JE   LBB7_8
   911  
   912  LBB7_7:
   913  	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
   914  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   915  	LONG $0xc6430f48         // cmovae    rax, rsi
   916  	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
   917  	LONG $0xf2470f49         // cmova    rsi, r10
   918  	LONG $0x01c18349         // add    r9, 1
   919  	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
   920  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   921  	JNE  LBB7_7
   922  
   923  LBB7_8:
   924  	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
   925  	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
   926  	VZEROUPPER
   927  	RET