github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/min_max_sse4_amd64.s (about)

     1  //+build !noasm !appengine
     2  // AUTO-GENERATED BY C2GOASM -- DO NOT EDIT
     3  
     4  DATA LCDATA1<>+0x000(SB)/8, $0x8000000080000000
     5  DATA LCDATA1<>+0x008(SB)/8, $0x8000000080000000
     6  DATA LCDATA1<>+0x010(SB)/8, $0x7fffffff7fffffff
     7  DATA LCDATA1<>+0x018(SB)/8, $0x7fffffff7fffffff
     8  GLOBL LCDATA1<>(SB), 8, $32
     9  
    10  TEXT ·_int32_max_min_sse4(SB), $0-32
    11  
    12  	MOVQ values+0(FP), DI
    13  	MOVQ length+8(FP), SI
    14  	MOVQ minout+16(FP), DX
    15  	MOVQ maxout+24(FP), CX
    16  	LEAQ LCDATA1<>(SB), BP
    17  
    18  	WORD $0xf685                   // test    esi, esi
    19  	JLE  LBB0_1
    20  	WORD $0x8941; BYTE $0xf1       // mov    r9d, esi
    21  	WORD $0xfe83; BYTE $0x07       // cmp    esi, 7
    22  	JA   LBB0_6
    23  	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
    24  	LONG $0xffffb841; WORD $0x7fff // mov    r8d, 2147483647
    25  	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
    26  	JMP  LBB0_4
    27  
    28  LBB0_1:
    29  	LONG $0xffffb841; WORD $0x7fff // mov    r8d, 2147483647
    30  	LONG $0x000000b8; BYTE $0x80   // mov    eax, -2147483648
    31  	JMP  LBB0_13
    32  
    33  LBB0_6:
    34  	WORD $0x8945; BYTE $0xcb     // mov    r11d, r9d
    35  	LONG $0xf8e38341             // and    r11d, -8
    36  	LONG $0xf8438d49             // lea    rax, [r11 - 8]
    37  	WORD $0x8949; BYTE $0xc0     // mov    r8, rax
    38  	LONG $0x03e8c149             // shr    r8, 3
    39  	LONG $0x01c08349             // add    r8, 1
    40  	WORD $0x8548; BYTE $0xc0     // test    rax, rax
    41  	JE   LBB0_7
    42  	WORD $0x894d; BYTE $0xc2     // mov    r10, r8
    43  	LONG $0xfee28349             // and    r10, -2
    44  	WORD $0xf749; BYTE $0xda     // neg    r10
    45  	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
    46  	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
    47  	WORD $0xc031                 // xor    eax, eax
    48  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
    49  	LONG $0xd96f0f66             // movdqa    xmm3, xmm1
    50  
    51  LBB0_9:
    52  	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
    53  	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
    54  	LONG $0x746f0ff3; WORD $0x2087 // movdqu    xmm6, oword [rdi + 4*rax + 32]
    55  	LONG $0x7c6f0ff3; WORD $0x3087 // movdqu    xmm7, oword [rdi + 4*rax + 48]
    56  	LONG $0x39380f66; BYTE $0xc4   // pminsd    xmm0, xmm4
    57  	LONG $0x39380f66; BYTE $0xd5   // pminsd    xmm2, xmm5
    58  	LONG $0x3d380f66; BYTE $0xcc   // pmaxsd    xmm1, xmm4
    59  	LONG $0x3d380f66; BYTE $0xdd   // pmaxsd    xmm3, xmm5
    60  	LONG $0x39380f66; BYTE $0xc6   // pminsd    xmm0, xmm6
    61  	LONG $0x39380f66; BYTE $0xd7   // pminsd    xmm2, xmm7
    62  	LONG $0x3d380f66; BYTE $0xce   // pmaxsd    xmm1, xmm6
    63  	LONG $0x3d380f66; BYTE $0xdf   // pmaxsd    xmm3, xmm7
    64  	LONG $0x10c08348               // add    rax, 16
    65  	LONG $0x02c28349               // add    r10, 2
    66  	JNE  LBB0_9
    67  	LONG $0x01c0f641               // test    r8b, 1
    68  	JE   LBB0_12
    69  
    70  LBB0_11:
    71  	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
    72  	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
    73  	LONG $0x3d380f66; BYTE $0xdd   // pmaxsd    xmm3, xmm5
    74  	LONG $0x3d380f66; BYTE $0xcc   // pmaxsd    xmm1, xmm4
    75  	LONG $0x39380f66; BYTE $0xd5   // pminsd    xmm2, xmm5
    76  	LONG $0x39380f66; BYTE $0xc4   // pminsd    xmm0, xmm4
    77  
    78  LBB0_12:
    79  	LONG $0x39380f66; BYTE $0xc2 // pminsd    xmm0, xmm2
    80  	LONG $0x3d380f66; BYTE $0xcb // pmaxsd    xmm1, xmm3
    81  	LONG $0xd1700f66; BYTE $0x4e // pshufd    xmm2, xmm1, 78
    82  	LONG $0x3d380f66; BYTE $0xd1 // pmaxsd    xmm2, xmm1
    83  	LONG $0xca700f66; BYTE $0xe5 // pshufd    xmm1, xmm2, 229
    84  	LONG $0x3d380f66; BYTE $0xca // pmaxsd    xmm1, xmm2
    85  	LONG $0xc87e0f66             // movd    eax, xmm1
    86  	LONG $0xc8700f66; BYTE $0x4e // pshufd    xmm1, xmm0, 78
    87  	LONG $0x39380f66; BYTE $0xc8 // pminsd    xmm1, xmm0
    88  	LONG $0xc1700f66; BYTE $0xe5 // pshufd    xmm0, xmm1, 229
    89  	LONG $0x39380f66; BYTE $0xc1 // pminsd    xmm0, xmm1
    90  	LONG $0x7e0f4166; BYTE $0xc0 // movd    r8d, xmm0
    91  	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
    92  	JE   LBB0_13
    93  
    94  LBB0_4:
    95  	WORD $0xc689 // mov    esi, eax
    96  
    97  LBB0_5:
    98  	LONG $0x9f048b42         // mov    eax, dword [rdi + 4*r11]
    99  	WORD $0x3941; BYTE $0xc0 // cmp    r8d, eax
   100  	LONG $0xc04f0f44         // cmovg    r8d, eax
   101  	WORD $0xc639             // cmp    esi, eax
   102  	WORD $0x4d0f; BYTE $0xc6 // cmovge    eax, esi
   103  	LONG $0x01c38349         // add    r11, 1
   104  	WORD $0xc689             // mov    esi, eax
   105  	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
   106  	JNE  LBB0_5
   107  
   108  LBB0_13:
   109  	WORD $0x0189             // mov    dword [rcx], eax
   110  	WORD $0x8944; BYTE $0x02 // mov    dword [rdx], r8d
   111  	RET
   112  
   113  LBB0_7:
   114  	LONG $0x4d6f0f66; BYTE $0x00 // movdqa    xmm1, oword 0[rbp] /* [rip + .LCPI0_0] */
   115  	LONG $0x456f0f66; BYTE $0x10 // movdqa    xmm0, oword 16[rbp] /* [rip + .LCPI0_1] */
   116  	WORD $0xc031                 // xor    eax, eax
   117  	LONG $0xd06f0f66             // movdqa    xmm2, xmm0
   118  	LONG $0xd96f0f66             // movdqa    xmm3, xmm1
   119  	LONG $0x01c0f641             // test    r8b, 1
   120  	JNE  LBB0_11
   121  	JMP  LBB0_12
   122  
   123  TEXT ·_uint32_max_min_sse4(SB), $0-32
   124  
   125  	MOVQ values+0(FP), DI
   126  	MOVQ length+8(FP), SI
   127  	MOVQ minout+16(FP), DX
   128  	MOVQ maxout+24(FP), CX
   129  
   130  	WORD $0xf685                   // test    esi, esi
   131  	JLE  LBB1_1
   132  	WORD $0x8941; BYTE $0xf1       // mov    r9d, esi
   133  	WORD $0xfe83; BYTE $0x07       // cmp    esi, 7
   134  	JA   LBB1_6
   135  	WORD $0x3145; BYTE $0xdb       // xor    r11d, r11d
   136  	LONG $0xffffb841; WORD $0xffff // mov    r8d, -1
   137  	WORD $0xf631                   // xor    esi, esi
   138  	JMP  LBB1_4
   139  
   140  LBB1_1:
   141  	LONG $0xffffb841; WORD $0xffff // mov    r8d, -1
   142  	WORD $0xf631                   // xor    esi, esi
   143  	JMP  LBB1_13
   144  
   145  LBB1_6:
   146  	WORD $0x8945; BYTE $0xcb // mov    r11d, r9d
   147  	LONG $0xf8e38341         // and    r11d, -8
   148  	LONG $0xf8438d49         // lea    rax, [r11 - 8]
   149  	WORD $0x8949; BYTE $0xc0 // mov    r8, rax
   150  	LONG $0x03e8c149         // shr    r8, 3
   151  	LONG $0x01c08349         // add    r8, 1
   152  	WORD $0x8548; BYTE $0xc0 // test    rax, rax
   153  	JE   LBB1_7
   154  	WORD $0x894d; BYTE $0xc2 // mov    r10, r8
   155  	LONG $0xfee28349         // and    r10, -2
   156  	WORD $0xf749; BYTE $0xda // neg    r10
   157  	LONG $0xc9ef0f66         // pxor    xmm1, xmm1
   158  	LONG $0xc0760f66         // pcmpeqd    xmm0, xmm0
   159  	WORD $0xc031             // xor    eax, eax
   160  	LONG $0xd2760f66         // pcmpeqd    xmm2, xmm2
   161  	LONG $0xdbef0f66         // pxor    xmm3, xmm3
   162  
   163  LBB1_9:
   164  	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
   165  	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
   166  	LONG $0x746f0ff3; WORD $0x2087 // movdqu    xmm6, oword [rdi + 4*rax + 32]
   167  	LONG $0x7c6f0ff3; WORD $0x3087 // movdqu    xmm7, oword [rdi + 4*rax + 48]
   168  	LONG $0x3b380f66; BYTE $0xc4   // pminud    xmm0, xmm4
   169  	LONG $0x3b380f66; BYTE $0xd5   // pminud    xmm2, xmm5
   170  	LONG $0x3f380f66; BYTE $0xcc   // pmaxud    xmm1, xmm4
   171  	LONG $0x3f380f66; BYTE $0xdd   // pmaxud    xmm3, xmm5
   172  	LONG $0x3b380f66; BYTE $0xc6   // pminud    xmm0, xmm6
   173  	LONG $0x3b380f66; BYTE $0xd7   // pminud    xmm2, xmm7
   174  	LONG $0x3f380f66; BYTE $0xce   // pmaxud    xmm1, xmm6
   175  	LONG $0x3f380f66; BYTE $0xdf   // pmaxud    xmm3, xmm7
   176  	LONG $0x10c08348               // add    rax, 16
   177  	LONG $0x02c28349               // add    r10, 2
   178  	JNE  LBB1_9
   179  	LONG $0x01c0f641               // test    r8b, 1
   180  	JE   LBB1_12
   181  
   182  LBB1_11:
   183  	LONG $0x246f0ff3; BYTE $0x87   // movdqu    xmm4, oword [rdi + 4*rax]
   184  	LONG $0x6c6f0ff3; WORD $0x1087 // movdqu    xmm5, oword [rdi + 4*rax + 16]
   185  	LONG $0x3f380f66; BYTE $0xdd   // pmaxud    xmm3, xmm5
   186  	LONG $0x3f380f66; BYTE $0xcc   // pmaxud    xmm1, xmm4
   187  	LONG $0x3b380f66; BYTE $0xd5   // pminud    xmm2, xmm5
   188  	LONG $0x3b380f66; BYTE $0xc4   // pminud    xmm0, xmm4
   189  
   190  LBB1_12:
   191  	LONG $0x3b380f66; BYTE $0xc2 // pminud    xmm0, xmm2
   192  	LONG $0x3f380f66; BYTE $0xcb // pmaxud    xmm1, xmm3
   193  	LONG $0xd1700f66; BYTE $0x4e // pshufd    xmm2, xmm1, 78
   194  	LONG $0x3f380f66; BYTE $0xd1 // pmaxud    xmm2, xmm1
   195  	LONG $0xca700f66; BYTE $0xe5 // pshufd    xmm1, xmm2, 229
   196  	LONG $0x3f380f66; BYTE $0xca // pmaxud    xmm1, xmm2
   197  	LONG $0xce7e0f66             // movd    esi, xmm1
   198  	LONG $0xc8700f66; BYTE $0x4e // pshufd    xmm1, xmm0, 78
   199  	LONG $0x3b380f66; BYTE $0xc8 // pminud    xmm1, xmm0
   200  	LONG $0xc1700f66; BYTE $0xe5 // pshufd    xmm0, xmm1, 229
   201  	LONG $0x3b380f66; BYTE $0xc1 // pminud    xmm0, xmm1
   202  	LONG $0x7e0f4166; BYTE $0xc0 // movd    r8d, xmm0
   203  	WORD $0x394d; BYTE $0xcb     // cmp    r11, r9
   204  	JE   LBB1_13
   205  
   206  LBB1_4:
   207  	WORD $0xf089 // mov    eax, esi
   208  
   209  LBB1_5:
   210  	LONG $0x9f348b42         // mov    esi, dword [rdi + 4*r11]
   211  	WORD $0x3941; BYTE $0xf0 // cmp    r8d, esi
   212  	LONG $0xc6430f44         // cmovae    r8d, esi
   213  	WORD $0xf039             // cmp    eax, esi
   214  	WORD $0x470f; BYTE $0xf0 // cmova    esi, eax
   215  	LONG $0x01c38349         // add    r11, 1
   216  	WORD $0xf089             // mov    eax, esi
   217  	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
   218  	JNE  LBB1_5
   219  
   220  LBB1_13:
   221  	WORD $0x3189             // mov    dword [rcx], esi
   222  	WORD $0x8944; BYTE $0x02 // mov    dword [rdx], r8d
   223  	RET
   224  
   225  LBB1_7:
   226  	LONG $0xc9ef0f66 // pxor    xmm1, xmm1
   227  	LONG $0xc0760f66 // pcmpeqd    xmm0, xmm0
   228  	WORD $0xc031     // xor    eax, eax
   229  	LONG $0xd2760f66 // pcmpeqd    xmm2, xmm2
   230  	LONG $0xdbef0f66 // pxor    xmm3, xmm3
   231  	LONG $0x01c0f641 // test    r8b, 1
   232  	JNE  LBB1_11
   233  	JMP  LBB1_12
   234  
   235  DATA LCDATA2<>+0x000(SB)/8, $0x8000000000000000
   236  DATA LCDATA2<>+0x008(SB)/8, $0x8000000000000000
   237  DATA LCDATA2<>+0x010(SB)/8, $0x7fffffffffffffff
   238  DATA LCDATA2<>+0x018(SB)/8, $0x7fffffffffffffff
   239  GLOBL LCDATA2<>(SB), 8, $32
   240  
   241  TEXT ·_int64_max_min_sse4(SB), $0-32
   242  
   243  	MOVQ values+0(FP), DI
   244  	MOVQ length+8(FP), SI
   245  	MOVQ minout+16(FP), DX
   246  	MOVQ maxout+24(FP), CX
   247  	LEAQ LCDATA2<>(SB), BP
   248  
   249  	QUAD $0xffffffffffffb849; WORD $0x7fff // mov    r8, 9223372036854775807
   250  	WORD $0xf685                           // test    esi, esi
   251  	JLE  LBB2_1
   252  	WORD $0x8941; BYTE $0xf1               // mov    r9d, esi
   253  	WORD $0xfe83; BYTE $0x03               // cmp    esi, 3
   254  	JA   LBB2_6
   255  	LONG $0x01708d49                       // lea    rsi, [r8 + 1]
   256  	WORD $0x3145; BYTE $0xdb               // xor    r11d, r11d
   257  	JMP  LBB2_4
   258  
   259  LBB2_1:
   260  	LONG $0x01708d49 // lea    rsi, [r8 + 1]
   261  	JMP  LBB2_13
   262  
   263  LBB2_6:
   264  	WORD $0x8945; BYTE $0xcb       // mov    r11d, r9d
   265  	LONG $0xfce38341               // and    r11d, -4
   266  	LONG $0xfc438d49               // lea    rax, [r11 - 4]
   267  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
   268  	LONG $0x02e8c149               // shr    r8, 2
   269  	LONG $0x01c08349               // add    r8, 1
   270  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
   271  	JE   LBB2_7
   272  	WORD $0x894d; BYTE $0xc2       // mov    r10, r8
   273  	LONG $0xfee28349               // and    r10, -2
   274  	WORD $0xf749; BYTE $0xda       // neg    r10
   275  	LONG $0x6f0f4466; WORD $0x004d // movdqa    xmm9, oword 0[rbp] /* [rip + .LCPI2_0] */
   276  	LONG $0x6f0f4466; WORD $0x1045 // movdqa    xmm8, oword 16[rbp] /* [rip + .LCPI2_1] */
   277  	WORD $0xc031                   // xor    eax, eax
   278  	LONG $0x6f0f4166; BYTE $0xd0   // movdqa    xmm2, xmm8
   279  	LONG $0x6f0f4166; BYTE $0xf1   // movdqa    xmm6, xmm9
   280  
   281  LBB2_9:
   282  	LONG $0x3c6f0ff3; BYTE $0xc7   // movdqu    xmm7, oword [rdi + 8*rax]
   283  	LONG $0xc76f0f66               // movdqa    xmm0, xmm7
   284  	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
   285  	LONG $0xe76f0f66               // movdqa    xmm4, xmm7
   286  	LONG $0x380f4166; WORD $0xe015 // blendvpd    xmm4, xmm8, xmm0
   287  	LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu    xmm1, oword [rdi + 8*rax + 16]
   288  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
   289  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   290  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
   291  	LONG $0x15380f66; BYTE $0xea   // blendvpd    xmm5, xmm2, xmm0
   292  	LONG $0x6f0f4166; BYTE $0xc1   // movdqa    xmm0, xmm9
   293  	LONG $0x37380f66; BYTE $0xc7   // pcmpgtq    xmm0, xmm7
   294  	LONG $0x380f4166; WORD $0xf915 // blendvpd    xmm7, xmm9, xmm0
   295  	LONG $0xc66f0f66               // movdqa    xmm0, xmm6
   296  	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
   297  	LONG $0x15380f66; BYTE $0xce   // blendvpd    xmm1, xmm6, xmm0
   298  	LONG $0x5c6f0ff3; WORD $0x20c7 // movdqu    xmm3, oword [rdi + 8*rax + 32]
   299  	LONG $0xc36f0f66               // movdqa    xmm0, xmm3
   300  	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
   301  	LONG $0x6f0f4466; BYTE $0xc3   // movdqa    xmm8, xmm3
   302  	LONG $0x380f4466; WORD $0xc415 // blendvpd    xmm8, xmm4, xmm0
   303  	LONG $0x646f0ff3; WORD $0x30c7 // movdqu    xmm4, oword [rdi + 8*rax + 48]
   304  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
   305  	LONG $0x37380f66; BYTE $0xc5   // pcmpgtq    xmm0, xmm5
   306  	LONG $0xd46f0f66               // movdqa    xmm2, xmm4
   307  	LONG $0x15380f66; BYTE $0xd5   // blendvpd    xmm2, xmm5, xmm0
   308  	LONG $0xc7280f66               // movapd    xmm0, xmm7
   309  	LONG $0x37380f66; BYTE $0xc3   // pcmpgtq    xmm0, xmm3
   310  	LONG $0x15380f66; BYTE $0xdf   // blendvpd    xmm3, xmm7, xmm0
   311  	LONG $0xc1280f66               // movapd    xmm0, xmm1
   312  	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
   313  	LONG $0x15380f66; BYTE $0xe1   // blendvpd    xmm4, xmm1, xmm0
   314  	LONG $0x08c08348               // add    rax, 8
   315  	LONG $0x280f4466; BYTE $0xcb   // movapd    xmm9, xmm3
   316  	LONG $0xf4280f66               // movapd    xmm6, xmm4
   317  	LONG $0x02c28349               // add    r10, 2
   318  	JNE  LBB2_9
   319  	LONG $0x01c0f641               // test    r8b, 1
   320  	JE   LBB2_12
   321  
   322  LBB2_11:
   323  	LONG $0x4c6f0ff3; WORD $0x10c7 // movdqu    xmm1, oword [rdi + 8*rax + 16]
   324  	LONG $0xc4280f66               // movapd    xmm0, xmm4
   325  	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
   326  	LONG $0xe96f0f66               // movdqa    xmm5, xmm1
   327  	LONG $0x15380f66; BYTE $0xec   // blendvpd    xmm5, xmm4, xmm0
   328  	LONG $0x246f0ff3; BYTE $0xc7   // movdqu    xmm4, oword [rdi + 8*rax]
   329  	LONG $0xc3280f66               // movapd    xmm0, xmm3
   330  	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
   331  	LONG $0xf46f0f66               // movdqa    xmm6, xmm4
   332  	LONG $0x15380f66; BYTE $0xf3   // blendvpd    xmm6, xmm3, xmm0
   333  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
   334  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   335  	LONG $0x15380f66; BYTE $0xca   // blendvpd    xmm1, xmm2, xmm0
   336  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
   337  	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
   338  	LONG $0x380f4166; WORD $0xe015 // blendvpd    xmm4, xmm8, xmm0
   339  	LONG $0x280f4466; BYTE $0xc4   // movapd    xmm8, xmm4
   340  	LONG $0xd1280f66               // movapd    xmm2, xmm1
   341  	LONG $0xde280f66               // movapd    xmm3, xmm6
   342  	LONG $0xe5280f66               // movapd    xmm4, xmm5
   343  
   344  LBB2_12:
   345  	LONG $0xc3280f66               // movapd    xmm0, xmm3
   346  	LONG $0x37380f66; BYTE $0xc4   // pcmpgtq    xmm0, xmm4
   347  	LONG $0x15380f66; BYTE $0xe3   // blendvpd    xmm4, xmm3, xmm0
   348  	LONG $0xcc700f66; BYTE $0x4e   // pshufd    xmm1, xmm4, 78
   349  	LONG $0xc46f0f66               // movdqa    xmm0, xmm4
   350  	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
   351  	LONG $0x15380f66; BYTE $0xcc   // blendvpd    xmm1, xmm4, xmm0
   352  	LONG $0x7e0f4866; BYTE $0xce   // movq    rsi, xmm1
   353  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
   354  	LONG $0x380f4166; WORD $0xc037 // pcmpgtq    xmm0, xmm8
   355  	LONG $0x380f4166; WORD $0xd015 // blendvpd    xmm2, xmm8, xmm0
   356  	LONG $0xca700f66; BYTE $0x4e   // pshufd    xmm1, xmm2, 78
   357  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
   358  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   359  	LONG $0x15380f66; BYTE $0xca   // blendvpd    xmm1, xmm2, xmm0
   360  	LONG $0x7e0f4966; BYTE $0xc8   // movq    r8, xmm1
   361  	WORD $0x394d; BYTE $0xcb       // cmp    r11, r9
   362  	JE   LBB2_13
   363  
   364  LBB2_4:
   365  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   366  
   367  LBB2_5:
   368  	LONG $0xdf348b4a         // mov    rsi, qword [rdi + 8*r11]
   369  	WORD $0x3949; BYTE $0xf0 // cmp    r8, rsi
   370  	LONG $0xc64f0f4c         // cmovg    r8, rsi
   371  	WORD $0x3948; BYTE $0xf0 // cmp    rax, rsi
   372  	LONG $0xf04d0f48         // cmovge    rsi, rax
   373  	LONG $0x01c38349         // add    r11, 1
   374  	WORD $0x8948; BYTE $0xf0 // mov    rax, rsi
   375  	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
   376  	JNE  LBB2_5
   377  
   378  LBB2_13:
   379  	WORD $0x8948; BYTE $0x31 // mov    qword [rcx], rsi
   380  	WORD $0x894c; BYTE $0x02 // mov    qword [rdx], r8
   381  	RET
   382  
   383  LBB2_7:
   384  	LONG $0x5d280f66; BYTE $0x00   // movapd    xmm3, oword 0[rbp] /* [rip + .LCPI2_0] */
   385  	LONG $0x6f0f4466; WORD $0x1045 // movdqa    xmm8, oword 16[rbp] /* [rip + .LCPI2_1] */
   386  	WORD $0xc031                   // xor    eax, eax
   387  	LONG $0x6f0f4166; BYTE $0xd0   // movdqa    xmm2, xmm8
   388  	LONG $0xe3280f66               // movapd    xmm4, xmm3
   389  	LONG $0x01c0f641               // test    r8b, 1
   390  	JNE  LBB2_11
   391  	JMP  LBB2_12
   392  
   393  DATA LCDATA3<>+0x000(SB)/8, $0x8000000000000000
   394  DATA LCDATA3<>+0x008(SB)/8, $0x8000000000000000
   395  GLOBL LCDATA3<>(SB), 8, $16
   396  
   397  TEXT ·_uint64_max_min_sse4(SB), $0-32
   398  
   399  	MOVQ values+0(FP), DI
   400  	MOVQ length+8(FP), SI
   401  	MOVQ minout+16(FP), DX
   402  	MOVQ maxout+24(FP), CX
   403  	LEAQ LCDATA3<>(SB), BP
   404  
   405  	WORD $0xf685                               // test    esi, esi
   406  	JLE  LBB3_1
   407  	WORD $0x8941; BYTE $0xf1                   // mov    r9d, esi
   408  	WORD $0xfe83; BYTE $0x03                   // cmp    esi, 3
   409  	JA   LBB3_6
   410  	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov    r8, -1
   411  	WORD $0x3145; BYTE $0xdb                   // xor    r11d, r11d
   412  	WORD $0xc031                               // xor    eax, eax
   413  	JMP  LBB3_4
   414  
   415  LBB3_1:
   416  	LONG $0xffc0c749; WORD $0xffff; BYTE $0xff // mov    r8, -1
   417  	WORD $0xc031                               // xor    eax, eax
   418  	JMP  LBB3_13
   419  
   420  LBB3_6:
   421  	WORD $0x8945; BYTE $0xcb       // mov    r11d, r9d
   422  	LONG $0xfce38341               // and    r11d, -4
   423  	LONG $0xfc438d49               // lea    rax, [r11 - 4]
   424  	WORD $0x8949; BYTE $0xc0       // mov    r8, rax
   425  	LONG $0x02e8c149               // shr    r8, 2
   426  	LONG $0x01c08349               // add    r8, 1
   427  	WORD $0x8548; BYTE $0xc0       // test    rax, rax
   428  	JE   LBB3_7
   429  	WORD $0x894d; BYTE $0xc2       // mov    r10, r8
   430  	LONG $0xfee28349               // and    r10, -2
   431  	WORD $0xf749; BYTE $0xda       // neg    r10
   432  	LONG $0xef0f4566; BYTE $0xc9   // pxor    xmm9, xmm9
   433  	LONG $0x760f4566; BYTE $0xd2   // pcmpeqd    xmm10, xmm10
   434  	WORD $0xc031                   // xor    eax, eax
   435  	LONG $0x6f0f4466; WORD $0x0045 // movdqa    xmm8, oword 0[rbp] /* [rip + .LCPI3_0] */
   436  	LONG $0x760f4566; BYTE $0xdb   // pcmpeqd    xmm11, xmm11
   437  	LONG $0xef0f4566; BYTE $0xe4   // pxor    xmm12, xmm12
   438  
   439  LBB3_9:
   440  	LONG $0x6f0f4166; BYTE $0xd2               // movdqa    xmm2, xmm10
   441  	LONG $0xef0f4166; BYTE $0xd0               // pxor    xmm2, xmm8
   442  	LONG $0x246f0ff3; BYTE $0xc7               // movdqu    xmm4, oword [rdi + 8*rax]
   443  	LONG $0x6c6f0ff3; WORD $0x10c7             // movdqu    xmm5, oword [rdi + 8*rax + 16]
   444  	LONG $0x6f0f44f3; WORD $0xc76c; BYTE $0x20 // movdqu    xmm13, oword [rdi + 8*rax + 32]
   445  	LONG $0xc46f0f66                           // movdqa    xmm0, xmm4
   446  	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
   447  	LONG $0x6f0f4166; BYTE $0xc9               // movdqa    xmm1, xmm9
   448  	LONG $0xef0f4166; BYTE $0xc8               // pxor    xmm1, xmm8
   449  	LONG $0x37380f66; BYTE $0xc8               // pcmpgtq    xmm1, xmm0
   450  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
   451  	LONG $0xdc6f0f66                           // movdqa    xmm3, xmm4
   452  	LONG $0x380f4166; WORD $0xda15             // blendvpd    xmm3, xmm10, xmm0
   453  	LONG $0x746f0ff3; WORD $0x30c7             // movdqu    xmm6, oword [rdi + 8*rax + 48]
   454  	LONG $0x6f0f4166; BYTE $0xfb               // movdqa    xmm7, xmm11
   455  	LONG $0xef0f4166; BYTE $0xf8               // pxor    xmm7, xmm8
   456  	LONG $0xc56f0f66                           // movdqa    xmm0, xmm5
   457  	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
   458  	LONG $0x6f0f4166; BYTE $0xd4               // movdqa    xmm2, xmm12
   459  	LONG $0xef0f4166; BYTE $0xd0               // pxor    xmm2, xmm8
   460  	LONG $0x37380f66; BYTE $0xd0               // pcmpgtq    xmm2, xmm0
   461  	LONG $0x37380f66; BYTE $0xc7               // pcmpgtq    xmm0, xmm7
   462  	LONG $0xfd6f0f66                           // movdqa    xmm7, xmm5
   463  	LONG $0x380f4166; WORD $0xfb15             // blendvpd    xmm7, xmm11, xmm0
   464  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
   465  	LONG $0x380f4166; WORD $0xe115             // blendvpd    xmm4, xmm9, xmm0
   466  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
   467  	LONG $0x380f4166; WORD $0xec15             // blendvpd    xmm5, xmm12, xmm0
   468  	LONG $0xd3280f66                           // movapd    xmm2, xmm3
   469  	LONG $0x570f4166; BYTE $0xd0               // xorpd    xmm2, xmm8
   470  	LONG $0x6f0f4166; BYTE $0xc5               // movdqa    xmm0, xmm13
   471  	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
   472  	LONG $0xcc280f66                           // movapd    xmm1, xmm4
   473  	LONG $0x570f4166; BYTE $0xc8               // xorpd    xmm1, xmm8
   474  	LONG $0x37380f66; BYTE $0xc8               // pcmpgtq    xmm1, xmm0
   475  	LONG $0x37380f66; BYTE $0xc2               // pcmpgtq    xmm0, xmm2
   476  	LONG $0x6f0f4566; BYTE $0xd5               // movdqa    xmm10, xmm13
   477  	LONG $0x380f4466; WORD $0xd315             // blendvpd    xmm10, xmm3, xmm0
   478  	LONG $0xdf280f66                           // movapd    xmm3, xmm7
   479  	LONG $0x570f4166; BYTE $0xd8               // xorpd    xmm3, xmm8
   480  	LONG $0xc66f0f66                           // movdqa    xmm0, xmm6
   481  	LONG $0xef0f4166; BYTE $0xc0               // pxor    xmm0, xmm8
   482  	LONG $0xd5280f66                           // movapd    xmm2, xmm5
   483  	LONG $0x570f4166; BYTE $0xd0               // xorpd    xmm2, xmm8
   484  	LONG $0x37380f66; BYTE $0xd0               // pcmpgtq    xmm2, xmm0
   485  	LONG $0x37380f66; BYTE $0xc3               // pcmpgtq    xmm0, xmm3
   486  	LONG $0x6f0f4466; BYTE $0xde               // movdqa    xmm11, xmm6
   487  	LONG $0x380f4466; WORD $0xdf15             // blendvpd    xmm11, xmm7, xmm0
   488  	LONG $0xc16f0f66                           // movdqa    xmm0, xmm1
   489  	LONG $0x380f4466; WORD $0xec15             // blendvpd    xmm13, xmm4, xmm0
   490  	LONG $0xc26f0f66                           // movdqa    xmm0, xmm2
   491  	LONG $0x15380f66; BYTE $0xf5               // blendvpd    xmm6, xmm5, xmm0
   492  	LONG $0x08c08348                           // add    rax, 8
   493  	LONG $0x280f4566; BYTE $0xcd               // movapd    xmm9, xmm13
   494  	LONG $0x280f4466; BYTE $0xe6               // movapd    xmm12, xmm6
   495  	LONG $0x02c28349                           // add    r10, 2
   496  	JNE  LBB3_9
   497  	LONG $0x01c0f641                           // test    r8b, 1
   498  	JE   LBB3_12
   499  
   500  LBB3_11:
   501  	LONG $0x24100f66; BYTE $0xc7   // movupd    xmm4, oword [rdi + 8*rax]
   502  	LONG $0x5c100f66; WORD $0x10c7 // movupd    xmm3, oword [rdi + 8*rax + 16]
   503  	LONG $0x6d280f66; BYTE $0x00   // movapd    xmm5, oword 0[rbp] /* [rip + .LCPI3_0] */
   504  	LONG $0xc6280f66               // movapd    xmm0, xmm6
   505  	LONG $0xc5570f66               // xorpd    xmm0, xmm5
   506  	LONG $0xcb280f66               // movapd    xmm1, xmm3
   507  	LONG $0xcd570f66               // xorpd    xmm1, xmm5
   508  	LONG $0x37380f66; BYTE $0xc1   // pcmpgtq    xmm0, xmm1
   509  	LONG $0xfb280f66               // movapd    xmm7, xmm3
   510  	LONG $0x15380f66; BYTE $0xfe   // blendvpd    xmm7, xmm6, xmm0
   511  	LONG $0x280f4166; BYTE $0xc5   // movapd    xmm0, xmm13
   512  	LONG $0xc5570f66               // xorpd    xmm0, xmm5
   513  	LONG $0xd4280f66               // movapd    xmm2, xmm4
   514  	LONG $0xd5570f66               // xorpd    xmm2, xmm5
   515  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   516  	LONG $0xf4280f66               // movapd    xmm6, xmm4
   517  	LONG $0x380f4166; WORD $0xf515 // blendvpd    xmm6, xmm13, xmm0
   518  	LONG $0x280f4166; BYTE $0xc3   // movapd    xmm0, xmm11
   519  	LONG $0xc5570f66               // xorpd    xmm0, xmm5
   520  	LONG $0x37380f66; BYTE $0xc8   // pcmpgtq    xmm1, xmm0
   521  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
   522  	LONG $0x380f4166; WORD $0xdb15 // blendvpd    xmm3, xmm11, xmm0
   523  	LONG $0x570f4166; BYTE $0xea   // xorpd    xmm5, xmm10
   524  	LONG $0x37380f66; BYTE $0xd5   // pcmpgtq    xmm2, xmm5
   525  	LONG $0xc26f0f66               // movdqa    xmm0, xmm2
   526  	LONG $0x380f4166; WORD $0xe215 // blendvpd    xmm4, xmm10, xmm0
   527  	LONG $0x280f4466; BYTE $0xd4   // movapd    xmm10, xmm4
   528  	LONG $0x280f4466; BYTE $0xdb   // movapd    xmm11, xmm3
   529  	LONG $0x280f4466; BYTE $0xee   // movapd    xmm13, xmm6
   530  	LONG $0xf7280f66               // movapd    xmm6, xmm7
   531  
   532  LBB3_12:
   533  	LONG $0x4d280f66; BYTE $0x00   // movapd    xmm1, oword 0[rbp] /* [rip + .LCPI3_0] */
   534  	LONG $0xd6280f66               // movapd    xmm2, xmm6
   535  	LONG $0xd1570f66               // xorpd    xmm2, xmm1
   536  	LONG $0x280f4166; BYTE $0xc5   // movapd    xmm0, xmm13
   537  	LONG $0xc1570f66               // xorpd    xmm0, xmm1
   538  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   539  	LONG $0x380f4166; WORD $0xf515 // blendvpd    xmm6, xmm13, xmm0
   540  	LONG $0xd6700f66; BYTE $0x4e   // pshufd    xmm2, xmm6, 78
   541  	LONG $0xc6280f66               // movapd    xmm0, xmm6
   542  	LONG $0xc1570f66               // xorpd    xmm0, xmm1
   543  	LONG $0xda6f0f66               // movdqa    xmm3, xmm2
   544  	LONG $0xd9ef0f66               // pxor    xmm3, xmm1
   545  	LONG $0x37380f66; BYTE $0xc3   // pcmpgtq    xmm0, xmm3
   546  	LONG $0x15380f66; BYTE $0xd6   // blendvpd    xmm2, xmm6, xmm0
   547  	LONG $0x7e0f4866; BYTE $0xd0   // movq    rax, xmm2
   548  	LONG $0x6f0f4166; BYTE $0xd2   // movdqa    xmm2, xmm10
   549  	LONG $0xd1ef0f66               // pxor    xmm2, xmm1
   550  	LONG $0x6f0f4166; BYTE $0xc3   // movdqa    xmm0, xmm11
   551  	LONG $0xc1ef0f66               // pxor    xmm0, xmm1
   552  	LONG $0x37380f66; BYTE $0xc2   // pcmpgtq    xmm0, xmm2
   553  	LONG $0x380f4566; WORD $0xda15 // blendvpd    xmm11, xmm10, xmm0
   554  	LONG $0x700f4166; WORD $0x4ed3 // pshufd    xmm2, xmm11, 78
   555  	LONG $0x6f0f4166; BYTE $0xc3   // movdqa    xmm0, xmm11
   556  	LONG $0xc1ef0f66               // pxor    xmm0, xmm1
   557  	LONG $0xcaef0f66               // pxor    xmm1, xmm2
   558  	LONG $0x37380f66; BYTE $0xc8   // pcmpgtq    xmm1, xmm0
   559  	LONG $0xc16f0f66               // movdqa    xmm0, xmm1
   560  	LONG $0x380f4166; WORD $0xd315 // blendvpd    xmm2, xmm11, xmm0
   561  	LONG $0x7e0f4966; BYTE $0xd0   // movq    r8, xmm2
   562  	WORD $0x394d; BYTE $0xcb       // cmp    r11, r9
   563  	JE   LBB3_13
   564  
   565  LBB3_4:
   566  	WORD $0x8948; BYTE $0xc6 // mov    rsi, rax
   567  
   568  LBB3_5:
   569  	LONG $0xdf048b4a         // mov    rax, qword [rdi + 8*r11]
   570  	WORD $0x3949; BYTE $0xc0 // cmp    r8, rax
   571  	LONG $0xc0430f4c         // cmovae    r8, rax
   572  	WORD $0x3948; BYTE $0xc6 // cmp    rsi, rax
   573  	LONG $0xc6470f48         // cmova    rax, rsi
   574  	LONG $0x01c38349         // add    r11, 1
   575  	WORD $0x8948; BYTE $0xc6 // mov    rsi, rax
   576  	WORD $0x394d; BYTE $0xd9 // cmp    r9, r11
   577  	JNE  LBB3_5
   578  
   579  LBB3_13:
   580  	WORD $0x8948; BYTE $0x01 // mov    qword [rcx], rax
   581  	WORD $0x894c; BYTE $0x02 // mov    qword [rdx], r8
   582  	RET
   583  
   584  LBB3_7:
   585  	LONG $0x570f4566; BYTE $0xed // xorpd    xmm13, xmm13
   586  	LONG $0x760f4566; BYTE $0xd2 // pcmpeqd    xmm10, xmm10
   587  	WORD $0xc031                 // xor    eax, eax
   588  	LONG $0x760f4566; BYTE $0xdb // pcmpeqd    xmm11, xmm11
   589  	LONG $0xf6570f66             // xorpd    xmm6, xmm6
   590  	LONG $0x01c0f641             // test    r8b, 1
   591  	JNE  LBB3_11
   592  	JMP  LBB3_12