github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/min_max_avx2_amd64.s (about)

     1  //+build !noasm !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x7fffffff80000000
     5  GLOBL LCDATA1<>(SB), 8, $8
     6  
     7  TEXT ·_int32_max_min_avx2(SB), $0-32
     8  
     9  	MOVQ values+0(FP), DI
    10  	MOVQ length+8(FP), SI
    11  	MOVQ minout+16(FP), DX
    12  	MOVQ maxout+24(FP), CX
    13  	LEAQ LCDATA1<>(SB), BP
    14  
    15  	WORD $0xf685                   // test    esi, esi
    16  	JLE  LBB0_1
    17  	WORD $0x8941; BYTE $0xf0       // mov    r8d, esi
    18  	WORD $0xfe83; BYTE $0x1f       // cmp    esi, 31
    19  	JA   LBB0_4
    20  	LONG $0x0000ba41; WORD $0x8000 // mov    r10d, -2147483648
    21  	LONG $0xffffffb8; BYTE $0x7f   // mov    eax, 2147483647
    22  	WORD $0x3145; BYTE $0xc9       // xor    r9d, r9d
    23  	JMP  LBB0_7
    24  
    25  LBB0_1:
    26  	LONG $0xffffffb8; BYTE $0x7f // mov    eax, 2147483647
    27  	LONG $0x000000be; BYTE $0x80 // mov    esi, -2147483648
    28  	JMP  LBB0_8
    29  
    30  LBB0_4:
    31  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
    32  	LONG $0x587de2c4; WORD $0x0065 // vpbroadcastd    ymm4, dword 0[rbp] /* [rip + .LCPI0_0] */
    33  	LONG $0xe0e18341               // and    r9d, -32
    34  	LONG $0x587de2c4; WORD $0x0445 // vpbroadcastd    ymm0, dword 4[rbp] /* [rip + .LCPI0_1] */
    35  	WORD $0xc031                   // xor    eax, eax
    36  	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
    37  	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
    38  	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
    39  	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
    40  	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
    41  	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
    42  
    43  LBB0_5:
    44  	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
    45  	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
    46  	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
    47  	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
    48  	LONG $0x397dc2c4; BYTE $0xc0   // vpminsd    ymm0, ymm0, ymm8
    49  	LONG $0x3975c2c4; BYTE $0xc9   // vpminsd    ymm1, ymm1, ymm9
    50  	LONG $0x396dc2c4; BYTE $0xd2   // vpminsd    ymm2, ymm2, ymm10
    51  	LONG $0x3965c2c4; BYTE $0xdb   // vpminsd    ymm3, ymm3, ymm11
    52  	LONG $0x3d5dc2c4; BYTE $0xe0   // vpmaxsd    ymm4, ymm4, ymm8
    53  	LONG $0x3d55c2c4; BYTE $0xe9   // vpmaxsd    ymm5, ymm5, ymm9
    54  	LONG $0x3d4dc2c4; BYTE $0xf2   // vpmaxsd    ymm6, ymm6, ymm10
    55  	LONG $0x3d45c2c4; BYTE $0xfb   // vpmaxsd    ymm7, ymm7, ymm11
    56  	LONG $0x20c08348               // add    rax, 32
    57  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
    58  	JNE  LBB0_5
    59  	LONG $0x3d5de2c4; BYTE $0xe5   // vpmaxsd    ymm4, ymm4, ymm5
    60  	LONG $0x3d5de2c4; BYTE $0xe6   // vpmaxsd    ymm4, ymm4, ymm6
    61  	LONG $0x3d5de2c4; BYTE $0xe7   // vpmaxsd    ymm4, ymm4, ymm7
    62  	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
    63  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
    64  	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
    65  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
    66  	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
    67  	LONG $0x3d59e2c4; BYTE $0xe5   // vpmaxsd    xmm4, xmm4, xmm5
    68  	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
    69  	LONG $0x397de2c4; BYTE $0xc1   // vpminsd    ymm0, ymm0, ymm1
    70  	LONG $0x397de2c4; BYTE $0xc2   // vpminsd    ymm0, ymm0, ymm2
    71  	LONG $0x397de2c4; BYTE $0xc3   // vpminsd    ymm0, ymm0, ymm3
    72  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
    73  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
    74  	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
    75  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
    76  	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
    77  	LONG $0x3979e2c4; BYTE $0xc1   // vpminsd    xmm0, xmm0, xmm1
    78  	LONG $0xc07ef9c5               // vmovd    eax, xmm0
    79  	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
    80  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
    81  	JE   LBB0_8
    82  
    83  LBB0_7:
    84  	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
    85  	WORD $0xf039             // cmp    eax, esi
    86  	WORD $0x4f0f; BYTE $0xc6 // cmovg    eax, esi
    87  	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
    88  	LONG $0xf24d0f41         // cmovge    esi, r10d
    89  	LONG $0x01c18349         // add    r9, 1
    90  	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
    91  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
    92  	JNE  LBB0_7
    93  
    94  LBB0_8:
    95  	WORD $0x3189 // mov    dword [rcx], esi
    96  	WORD $0x0289 // mov    dword [rdx], eax
    97  	VZEROUPPER
    98  	RET
    99  
   100  TEXT ·_uint32_max_min_avx2(SB), $0-32
   101  
   102  	MOVQ values+0(FP), DI
   103  	MOVQ length+8(FP), SI
   104  	MOVQ minout+16(FP), DX
   105  	MOVQ maxout+24(FP), CX
   106  
   107  	WORD $0xf685                 // test    esi, esi
   108  	JLE  LBB1_1
   109  	WORD $0x8941; BYTE $0xf0     // mov    r8d, esi
   110  	WORD $0xfe83; BYTE $0x1f     // cmp    esi, 31
   111  	JA   LBB1_4
   112  	WORD $0x3145; BYTE $0xc9     // xor    r9d, r9d
   113  	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
   114  	WORD $0x3145; BYTE $0xd2     // xor    r10d, r10d
   115  	JMP  LBB1_7
   116  
   117  LBB1_1:
   118  	LONG $0xffffffb8; BYTE $0xff // mov    eax, -1
   119  	WORD $0xf631                 // xor    esi, esi
   120  	JMP  LBB1_8
   121  
   122  LBB1_4:
   123  	WORD $0x8945; BYTE $0xc1 // mov    r9d, r8d
   124  	LONG $0xe0e18341         // and    r9d, -32
   125  	LONG $0xe4efd9c5         // vpxor    xmm4, xmm4, xmm4
   126  	LONG $0xc076fdc5         // vpcmpeqd    ymm0, ymm0, ymm0
   127  	WORD $0xc031             // xor    eax, eax
   128  	LONG $0xc976f5c5         // vpcmpeqd    ymm1, ymm1, ymm1
   129  	LONG $0xd276edc5         // vpcmpeqd    ymm2, ymm2, ymm2
   130  	LONG $0xdb76e5c5         // vpcmpeqd    ymm3, ymm3, ymm3
   131  	LONG $0xedefd1c5         // vpxor    xmm5, xmm5, xmm5
   132  	LONG $0xf6efc9c5         // vpxor    xmm6, xmm6, xmm6
   133  	LONG $0xffefc1c5         // vpxor    xmm7, xmm7, xmm7
   134  
   135  LBB1_5:
   136  	LONG $0x046f7ec5; BYTE $0x87   // vmovdqu    ymm8, yword [rdi + 4*rax]
   137  	LONG $0x4c6f7ec5; WORD $0x2087 // vmovdqu    ymm9, yword [rdi + 4*rax + 32]
   138  	LONG $0x546f7ec5; WORD $0x4087 // vmovdqu    ymm10, yword [rdi + 4*rax + 64]
   139  	LONG $0x5c6f7ec5; WORD $0x6087 // vmovdqu    ymm11, yword [rdi + 4*rax + 96]
   140  	LONG $0x3b7dc2c4; BYTE $0xc0   // vpminud    ymm0, ymm0, ymm8
   141  	LONG $0x3b75c2c4; BYTE $0xc9   // vpminud    ymm1, ymm1, ymm9
   142  	LONG $0x3b6dc2c4; BYTE $0xd2   // vpminud    ymm2, ymm2, ymm10
   143  	LONG $0x3b65c2c4; BYTE $0xdb   // vpminud    ymm3, ymm3, ymm11
   144  	LONG $0x3f5dc2c4; BYTE $0xe0   // vpmaxud    ymm4, ymm4, ymm8
   145  	LONG $0x3f55c2c4; BYTE $0xe9   // vpmaxud    ymm5, ymm5, ymm9
   146  	LONG $0x3f4dc2c4; BYTE $0xf2   // vpmaxud    ymm6, ymm6, ymm10
   147  	LONG $0x3f45c2c4; BYTE $0xfb   // vpmaxud    ymm7, ymm7, ymm11
   148  	LONG $0x20c08348               // add    rax, 32
   149  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   150  	JNE  LBB1_5
   151  	LONG $0x3f5de2c4; BYTE $0xe5   // vpmaxud    ymm4, ymm4, ymm5
   152  	LONG $0x3f5de2c4; BYTE $0xe6   // vpmaxud    ymm4, ymm4, ymm6
   153  	LONG $0x3f5de2c4; BYTE $0xe7   // vpmaxud    ymm4, ymm4, ymm7
   154  	LONG $0x397de3c4; WORD $0x01e5 // vextracti128    xmm5, ymm4, 1
   155  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   156  	LONG $0xec70f9c5; BYTE $0x4e   // vpshufd    xmm5, xmm4, 78
   157  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   158  	LONG $0xec70f9c5; BYTE $0xe5   // vpshufd    xmm5, xmm4, 229
   159  	LONG $0x3f59e2c4; BYTE $0xe5   // vpmaxud    xmm4, xmm4, xmm5
   160  	LONG $0x7e79c1c4; BYTE $0xe2   // vmovd    r10d, xmm4
   161  	LONG $0x3b7de2c4; BYTE $0xc1   // vpminud    ymm0, ymm0, ymm1
   162  	LONG $0x3b7de2c4; BYTE $0xc2   // vpminud    ymm0, ymm0, ymm2
   163  	LONG $0x3b7de2c4; BYTE $0xc3   // vpminud    ymm0, ymm0, ymm3
   164  	LONG $0x397de3c4; WORD $0x01c1 // vextracti128    xmm1, ymm0, 1
   165  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   166  	LONG $0xc870f9c5; BYTE $0x4e   // vpshufd    xmm1, xmm0, 78
   167  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   168  	LONG $0xc870f9c5; BYTE $0xe5   // vpshufd    xmm1, xmm0, 229
   169  	LONG $0x3b79e2c4; BYTE $0xc1   // vpminud    xmm0, xmm0, xmm1
   170  	LONG $0xc07ef9c5               // vmovd    eax, xmm0
   171  	WORD $0x8944; BYTE $0xd6       // mov    esi, r10d
   172  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   173  	JE   LBB1_8
   174  
   175  LBB1_7:
   176  	LONG $0x8f348b42         // mov    esi, dword [rdi + 4*r9]
   177  	WORD $0xf039             // cmp    eax, esi
   178  	WORD $0x430f; BYTE $0xc6 // cmovae    eax, esi
   179  	WORD $0x3941; BYTE $0xf2 // cmp    r10d, esi
   180  	LONG $0xf2470f41         // cmova    esi, r10d
   181  	LONG $0x01c18349         // add    r9, 1
   182  	WORD $0x8941; BYTE $0xf2 // mov    r10d, esi
   183  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   184  	JNE  LBB1_7
   185  
   186  LBB1_8:
   187  	WORD $0x3189 // mov    dword [rcx], esi
   188  	WORD $0x0289 // mov    dword [rdx], eax
   189  	VZEROUPPER
   190  	RET
   191  
   192  DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000
   193  DATA LCDATA2<>+0x008(SB)/8, $0x7fffffffffffffff
   194  GLOBL LCDATA2<>(SB), 8, $16
   195  
   196  TEXT ·_int64_max_min_avx2(SB), $0-32
   197  
   198  	MOVQ values+0(FP), DI
   199  	MOVQ length+8(FP), SI
   200  	MOVQ minout+16(FP), DX
   201  	MOVQ maxout+24(FP), CX
   202  	LEAQ LCDATA2<>(SB), BP
   203  
   204  	QUAD $0xffffffffffffb848; WORD $0x7fff // mov    rax, 9223372036854775807
   205  	WORD $0xf685                           // test    esi, esi
   206  	JLE  LBB2_1
   207  	WORD $0x8941; BYTE $0xf0               // mov    r8d, esi
   208  	WORD $0xfe83; BYTE $0x0f               // cmp    esi, 15
   209  	JA   LBB2_4
   210  	LONG $0x01508d4c                       // lea    r10, [rax + 1]
   211  	WORD $0x3145; BYTE $0xc9               // xor    r9d, r9d
   212  	JMP  LBB2_7
   213  
   214  LBB2_1:
   215  	LONG $0x01708d48 // lea    rsi, [rax + 1]
   216  	JMP  LBB2_8
   217  
   218  LBB2_4:
   219  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
   220  	LONG $0x597de2c4; WORD $0x0065 // vpbroadcastq    ymm4, qword 0[rbp] /* [rip + .LCPI2_0] */
   221  	LONG $0xf0e18341               // and    r9d, -16
   222  	LONG $0x597de2c4; WORD $0x0845 // vpbroadcastq    ymm0, qword 8[rbp] /* [rip + .LCPI2_1] */
   223  	WORD $0xc031                   // xor    eax, eax
   224  	LONG $0xd86ffdc5               // vmovdqa    ymm3, ymm0
   225  	LONG $0xd06ffdc5               // vmovdqa    ymm2, ymm0
   226  	LONG $0xc86ffdc5               // vmovdqa    ymm1, ymm0
   227  	LONG $0xfc6ffdc5               // vmovdqa    ymm7, ymm4
   228  	LONG $0xf46ffdc5               // vmovdqa    ymm6, ymm4
   229  	LONG $0xec6ffdc5               // vmovdqa    ymm5, ymm4
   230  
   231  LBB2_5:
   232  	LONG $0x046f7ec5; BYTE $0xc7   // vmovdqu    ymm8, yword [rdi + 8*rax]
   233  	LONG $0x373d62c4; BYTE $0xc8   // vpcmpgtq    ymm9, ymm8, ymm0
   234  	LONG $0x4b3de3c4; WORD $0x90c0 // vblendvpd    ymm0, ymm8, ymm0, ymm9
   235  	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
   236  	LONG $0x373562c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm9, ymm3
   237  	LONG $0x4b35e3c4; WORD $0xa0db // vblendvpd    ymm3, ymm9, ymm3, ymm10
   238  	LONG $0x546f7ec5; WORD $0x40c7 // vmovdqu    ymm10, yword [rdi + 8*rax + 64]
   239  	LONG $0x372d62c4; BYTE $0xda   // vpcmpgtq    ymm11, ymm10, ymm2
   240  	LONG $0x4b2de3c4; WORD $0xb0d2 // vblendvpd    ymm2, ymm10, ymm2, ymm11
   241  	LONG $0x5c6f7ec5; WORD $0x60c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 96]
   242  	LONG $0x372562c4; BYTE $0xe1   // vpcmpgtq    ymm12, ymm11, ymm1
   243  	LONG $0x4b25e3c4; WORD $0xc0c9 // vblendvpd    ymm1, ymm11, ymm1, ymm12
   244  	LONG $0x375d42c4; BYTE $0xe0   // vpcmpgtq    ymm12, ymm4, ymm8
   245  	LONG $0x4b3de3c4; WORD $0xc0e4 // vblendvpd    ymm4, ymm8, ymm4, ymm12
   246  	LONG $0x374542c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm7, ymm9
   247  	LONG $0x4b35e3c4; WORD $0x80ff // vblendvpd    ymm7, ymm9, ymm7, ymm8
   248  	LONG $0x374d42c4; BYTE $0xc2   // vpcmpgtq    ymm8, ymm6, ymm10
   249  	LONG $0x4b2de3c4; WORD $0x80f6 // vblendvpd    ymm6, ymm10, ymm6, ymm8
   250  	LONG $0x375542c4; BYTE $0xc3   // vpcmpgtq    ymm8, ymm5, ymm11
   251  	LONG $0x4b25e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm11, ymm5, ymm8
   252  	LONG $0x10c08348               // add    rax, 16
   253  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   254  	JNE  LBB2_5
   255  	LONG $0x375d62c4; BYTE $0xc7   // vpcmpgtq    ymm8, ymm4, ymm7
   256  	LONG $0x4b45e3c4; WORD $0x80e4 // vblendvpd    ymm4, ymm7, ymm4, ymm8
   257  	LONG $0x375de2c4; BYTE $0xfe   // vpcmpgtq    ymm7, ymm4, ymm6
   258  	LONG $0x4b4de3c4; WORD $0x70e4 // vblendvpd    ymm4, ymm6, ymm4, ymm7
   259  	LONG $0x375de2c4; BYTE $0xf5   // vpcmpgtq    ymm6, ymm4, ymm5
   260  	LONG $0x4b55e3c4; WORD $0x60e4 // vblendvpd    ymm4, ymm5, ymm4, ymm6
   261  	LONG $0x197de3c4; WORD $0x01e5 // vextractf128    xmm5, ymm4, 1
   262  	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
   263  	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
   264  	LONG $0x0479e3c4; WORD $0x4eec // vpermilps    xmm5, xmm4, 78
   265  	LONG $0x3759e2c4; BYTE $0xf5   // vpcmpgtq    xmm6, xmm4, xmm5
   266  	LONG $0x4b51e3c4; WORD $0x60e4 // vblendvpd    xmm4, xmm5, xmm4, xmm6
   267  	LONG $0x7ef9c1c4; BYTE $0xe2   // vmovq    r10, xmm4
   268  	LONG $0x3765e2c4; BYTE $0xe0   // vpcmpgtq    ymm4, ymm3, ymm0
   269  	LONG $0x4b65e3c4; WORD $0x40c0 // vblendvpd    ymm0, ymm3, ymm0, ymm4
   270  	LONG $0x376de2c4; BYTE $0xd8   // vpcmpgtq    ymm3, ymm2, ymm0
   271  	LONG $0x4b6de3c4; WORD $0x30c0 // vblendvpd    ymm0, ymm2, ymm0, ymm3
   272  	LONG $0x3775e2c4; BYTE $0xd0   // vpcmpgtq    ymm2, ymm1, ymm0
   273  	LONG $0x4b75e3c4; WORD $0x20c0 // vblendvpd    ymm0, ymm1, ymm0, ymm2
   274  	LONG $0x197de3c4; WORD $0x01c1 // vextractf128    xmm1, ymm0, 1
   275  	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
   276  	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
   277  	LONG $0x0479e3c4; WORD $0x4ec8 // vpermilps    xmm1, xmm0, 78
   278  	LONG $0x3771e2c4; BYTE $0xd0   // vpcmpgtq    xmm2, xmm1, xmm0
   279  	LONG $0x4b71e3c4; WORD $0x20c0 // vblendvpd    xmm0, xmm1, xmm0, xmm2
   280  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
   281  	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
   282  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   283  	JE   LBB2_8
   284  
   285  LBB2_7:
   286  	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
   287  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   288  	LONG $0xc64f0f48         // cmovg    rax, rsi
   289  	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
   290  	LONG $0xf24d0f49         // cmovge    rsi, r10
   291  	LONG $0x01c18349         // add    r9, 1
   292  	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
   293  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   294  	JNE  LBB2_7
   295  
   296  LBB2_8:
   297  	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
   298  	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
   299  	VZEROUPPER
   300  	RET
   301  
   302  DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000
   303  GLOBL LCDATA3<>(SB), 8, $8
   304  
   305  TEXT ·_uint64_max_min_avx2(SB), $0-32
   306  
   307  	MOVQ values+0(FP), DI
   308  	MOVQ length+8(FP), SI
   309  	MOVQ minout+16(FP), DX
   310  	MOVQ maxout+24(FP), CX
   311  	LEAQ LCDATA3<>(SB), BP
   312  
   313  	WORD $0xf685                               // test    esi, esi
   314  	JLE  LBB3_1
   315  	WORD $0x8941; BYTE $0xf0                   // mov    r8d, esi
   316  	WORD $0xfe83; BYTE $0x0f                   // cmp    esi, 15
   317  	JA   LBB3_4
   318  	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
   319  	WORD $0x3145; BYTE $0xc9                   // xor    r9d, r9d
   320  	WORD $0x3145; BYTE $0xd2                   // xor    r10d, r10d
   321  	JMP  LBB3_7
   322  
   323  LBB3_1:
   324  	LONG $0xffc0c748; WORD $0xffff; BYTE $0xff // mov    rax, -1
   325  	WORD $0xf631                               // xor    esi, esi
   326  	JMP  LBB3_8
   327  
   328  LBB3_4:
   329  	WORD $0x8945; BYTE $0xc1       // mov    r9d, r8d
   330  	LONG $0xf0e18341               // and    r9d, -16
   331  	LONG $0xedefd1c5               // vpxor    xmm5, xmm5, xmm5
   332  	LONG $0xc976f5c5               // vpcmpeqd    ymm1, ymm1, ymm1
   333  	WORD $0xc031                   // xor    eax, eax
   334  	LONG $0x597de2c4; WORD $0x0045 // vpbroadcastq    ymm0, qword 0[rbp] /* [rip + .LCPI3_0] */
   335  	LONG $0xe476ddc5               // vpcmpeqd    ymm4, ymm4, ymm4
   336  	LONG $0xdb76e5c5               // vpcmpeqd    ymm3, ymm3, ymm3
   337  	LONG $0xd276edc5               // vpcmpeqd    ymm2, ymm2, ymm2
   338  	LONG $0xef3941c4; BYTE $0xc0   // vpxor    xmm8, xmm8, xmm8
   339  	LONG $0xffefc1c5               // vpxor    xmm7, xmm7, xmm7
   340  	LONG $0xf6efc9c5               // vpxor    xmm6, xmm6, xmm6
   341  
   342  LBB3_5:
   343  	LONG $0x0c6f7ec5; BYTE $0xc7   // vmovdqu    ymm9, yword [rdi + 8*rax]
   344  	LONG $0xd0ef75c5               // vpxor    ymm10, ymm1, ymm0
   345  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   346  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   347  	LONG $0x4b35e3c4; WORD $0xa0c9 // vblendvpd    ymm1, ymm9, ymm1, ymm10
   348  	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
   349  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   350  	LONG $0x4b35e3c4; WORD $0xa0ed // vblendvpd    ymm5, ymm9, ymm5, ymm10
   351  	LONG $0x4c6f7ec5; WORD $0x20c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 32]
   352  	LONG $0xd0ef5dc5               // vpxor    ymm10, ymm4, ymm0
   353  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   354  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   355  	LONG $0x4b35e3c4; WORD $0xa0e4 // vblendvpd    ymm4, ymm9, ymm4, ymm10
   356  	LONG $0xd0ef3dc5               // vpxor    ymm10, ymm8, ymm0
   357  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   358  	LONG $0x5c6f7ec5; WORD $0x40c7 // vmovdqu    ymm11, yword [rdi + 8*rax + 64]
   359  	LONG $0x4b3543c4; WORD $0xa0c0 // vblendvpd    ymm8, ymm9, ymm8, ymm10
   360  	LONG $0xc8ef65c5               // vpxor    ymm9, ymm3, ymm0
   361  	LONG $0xd0ef25c5               // vpxor    ymm10, ymm11, ymm0
   362  	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
   363  	LONG $0x4b25e3c4; WORD $0x90db // vblendvpd    ymm3, ymm11, ymm3, ymm9
   364  	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
   365  	LONG $0x373542c4; BYTE $0xca   // vpcmpgtq    ymm9, ymm9, ymm10
   366  	LONG $0x4b25e3c4; WORD $0x90ff // vblendvpd    ymm7, ymm11, ymm7, ymm9
   367  	LONG $0x4c6f7ec5; WORD $0x60c7 // vmovdqu    ymm9, yword [rdi + 8*rax + 96]
   368  	LONG $0xd0ef6dc5               // vpxor    ymm10, ymm2, ymm0
   369  	LONG $0xd8ef35c5               // vpxor    ymm11, ymm9, ymm0
   370  	LONG $0x372542c4; BYTE $0xd2   // vpcmpgtq    ymm10, ymm11, ymm10
   371  	LONG $0x4b35e3c4; WORD $0xa0d2 // vblendvpd    ymm2, ymm9, ymm2, ymm10
   372  	LONG $0xd0ef4dc5               // vpxor    ymm10, ymm6, ymm0
   373  	LONG $0x372d42c4; BYTE $0xd3   // vpcmpgtq    ymm10, ymm10, ymm11
   374  	LONG $0x4b35e3c4; WORD $0xa0f6 // vblendvpd    ymm6, ymm9, ymm6, ymm10
   375  	LONG $0x10c08348               // add    rax, 16
   376  	WORD $0x3949; BYTE $0xc1       // cmp    r9, rax
   377  	JNE  LBB3_5
   378  	LONG $0xc8ef3dc5               // vpxor    ymm9, ymm8, ymm0
   379  	LONG $0xd0ef55c5               // vpxor    ymm10, ymm5, ymm0
   380  	LONG $0x372d42c4; BYTE $0xc9   // vpcmpgtq    ymm9, ymm10, ymm9
   381  	LONG $0x4b3de3c4; WORD $0x90ed // vblendvpd    ymm5, ymm8, ymm5, ymm9
   382  	LONG $0xc05755c5               // vxorpd    ymm8, ymm5, ymm0
   383  	LONG $0xc8ef45c5               // vpxor    ymm9, ymm7, ymm0
   384  	LONG $0x373d42c4; BYTE $0xc1   // vpcmpgtq    ymm8, ymm8, ymm9
   385  	LONG $0x4b45e3c4; WORD $0x80ed // vblendvpd    ymm5, ymm7, ymm5, ymm8
   386  	LONG $0xf857d5c5               // vxorpd    ymm7, ymm5, ymm0
   387  	LONG $0xc0ef4dc5               // vpxor    ymm8, ymm6, ymm0
   388  	LONG $0x3745c2c4; BYTE $0xf8   // vpcmpgtq    ymm7, ymm7, ymm8
   389  	LONG $0x4b4de3c4; WORD $0x70ed // vblendvpd    ymm5, ymm6, ymm5, ymm7
   390  	LONG $0x197de3c4; WORD $0x01ee // vextractf128    xmm6, ymm5, 1
   391  	LONG $0xc05749c5               // vxorpd    xmm8, xmm6, xmm0
   392  	LONG $0xf857d1c5               // vxorpd    xmm7, xmm5, xmm0
   393  	LONG $0x3741c2c4; BYTE $0xf8   // vpcmpgtq    xmm7, xmm7, xmm8
   394  	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
   395  	LONG $0x0479e3c4; WORD $0x4ef5 // vpermilps    xmm6, xmm5, 78
   396  	LONG $0xc05751c5               // vxorpd    xmm8, xmm5, xmm0
   397  	LONG $0xf857c9c5               // vxorpd    xmm7, xmm6, xmm0
   398  	LONG $0x3739e2c4; BYTE $0xff   // vpcmpgtq    xmm7, xmm8, xmm7
   399  	LONG $0x4b49e3c4; WORD $0x70ed // vblendvpd    xmm5, xmm6, xmm5, xmm7
   400  	LONG $0xf0eff5c5               // vpxor    ymm6, ymm1, ymm0
   401  	LONG $0xf8efddc5               // vpxor    ymm7, ymm4, ymm0
   402  	LONG $0x3745e2c4; BYTE $0xf6   // vpcmpgtq    ymm6, ymm7, ymm6
   403  	LONG $0x4b5de3c4; WORD $0x60c9 // vblendvpd    ymm1, ymm4, ymm1, ymm6
   404  	LONG $0xe057f5c5               // vxorpd    ymm4, ymm1, ymm0
   405  	LONG $0xf0efe5c5               // vpxor    ymm6, ymm3, ymm0
   406  	LONG $0x374de2c4; BYTE $0xe4   // vpcmpgtq    ymm4, ymm6, ymm4
   407  	LONG $0x4b65e3c4; WORD $0x40c9 // vblendvpd    ymm1, ymm3, ymm1, ymm4
   408  	LONG $0x7ef9c1c4; BYTE $0xea   // vmovq    r10, xmm5
   409  	LONG $0xd857f5c5               // vxorpd    ymm3, ymm1, ymm0
   410  	LONG $0xe0efedc5               // vpxor    ymm4, ymm2, ymm0
   411  	LONG $0x375de2c4; BYTE $0xdb   // vpcmpgtq    ymm3, ymm4, ymm3
   412  	LONG $0x4b6de3c4; WORD $0x30c9 // vblendvpd    ymm1, ymm2, ymm1, ymm3
   413  	LONG $0x197de3c4; WORD $0x01ca // vextractf128    xmm2, ymm1, 1
   414  	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
   415  	LONG $0xe057e9c5               // vxorpd    xmm4, xmm2, xmm0
   416  	LONG $0x3759e2c4; BYTE $0xdb   // vpcmpgtq    xmm3, xmm4, xmm3
   417  	LONG $0x4b69e3c4; WORD $0x30c9 // vblendvpd    xmm1, xmm2, xmm1, xmm3
   418  	LONG $0x0479e3c4; WORD $0x4ed1 // vpermilps    xmm2, xmm1, 78
   419  	LONG $0xd857f1c5               // vxorpd    xmm3, xmm1, xmm0
   420  	LONG $0xc057e9c5               // vxorpd    xmm0, xmm2, xmm0
   421  	LONG $0x3779e2c4; BYTE $0xc3   // vpcmpgtq    xmm0, xmm0, xmm3
   422  	LONG $0x4b69e3c4; WORD $0x00c1 // vblendvpd    xmm0, xmm2, xmm1, xmm0
   423  	LONG $0x7ef9e1c4; BYTE $0xc0   // vmovq    rax, xmm0
   424  	WORD $0x894c; BYTE $0xd6       // mov    rsi, r10
   425  	WORD $0x394d; BYTE $0xc1       // cmp    r9, r8
   426  	JE   LBB3_8
   427  
   428  LBB3_7:
   429  	LONG $0xcf348b4a         // mov    rsi, qword [rdi + 8*r9]
   430  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   431  	LONG $0xc6430f48         // cmovae    rax, rsi
   432  	WORD $0x3949; BYTE $0xf2 // cmp    r10, rsi
   433  	LONG $0xf2470f49         // cmova    rsi, r10
   434  	LONG $0x01c18349         // add    r9, 1
   435  	WORD $0x8949; BYTE $0xf2 // mov    r10, rsi
   436  	WORD $0x394d; BYTE $0xc8 // cmp    r8, r9
   437  	JNE  LBB3_7
   438  
   439  LBB3_8:
   440  	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
   441  	WORD $0x8948; BYTE $0x02 // mov    qword [rdx], rax
   442  	VZEROUPPER
   443  	RET