github.com/apache/arrow/go/v7@v7.0.1/parquet/internal/utils/_lib/min_max_sse4.s (about)

     1  	.text
     2  	.intel_syntax noprefix
     3  	.file	"min_max.c"
     4  	.section	.rodata.cst16,"aM",@progbits,16
     5  	.p2align	4                               # -- Begin function int32_max_min_sse4
     6  .LCPI0_0:
     7  	.long	2147483648                      # 0x80000000
     8  	.long	2147483648                      # 0x80000000
     9  	.long	2147483648                      # 0x80000000
    10  	.long	2147483648                      # 0x80000000
    11  .LCPI0_1:
    12  	.long	2147483647                      # 0x7fffffff
    13  	.long	2147483647                      # 0x7fffffff
    14  	.long	2147483647                      # 0x7fffffff
    15  	.long	2147483647                      # 0x7fffffff
    16  	.text
    17  	.globl	int32_max_min_sse4
    18  	.p2align	4, 0x90
    19  	.type	int32_max_min_sse4,@function
    20  int32_max_min_sse4:                     # @int32_max_min_sse4
    21  # %bb.0:
    22  	push	rbp
    23  	mov	rbp, rsp
    24  	and	rsp, -8
    25  	test	esi, esi
    26  	jle	.LBB0_1
    27  # %bb.2:
    28  	mov	r9d, esi
    29  	cmp	esi, 7
    30  	ja	.LBB0_6
    31  # %bb.3:
    32  	mov	eax, -2147483648
    33  	mov	r8d, 2147483647
    34  	xor	r11d, r11d
    35  	jmp	.LBB0_4
    36  .LBB0_1:
    37  	mov	r8d, 2147483647
    38  	mov	eax, -2147483648
    39  	jmp	.LBB0_13
    40  .LBB0_6:
    41  	mov	r11d, r9d
    42  	and	r11d, -8
    43  	lea	rax, [r11 - 8]
    44  	mov	r8, rax
    45  	shr	r8, 3
    46  	add	r8, 1
    47  	test	rax, rax
    48  	je	.LBB0_7
    49  # %bb.8:
    50  	mov	r10, r8
    51  	and	r10, -2
    52  	neg	r10
    53  	movdqa	xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
    54  	movdqa	xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
    55  	xor	eax, eax
    56  	movdqa	xmm2, xmm0
    57  	movdqa	xmm3, xmm1
    58  	.p2align	4, 0x90
    59  .LBB0_9:                                # =>This Inner Loop Header: Depth=1
    60  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
    61  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
    62  	movdqu	xmm6, xmmword ptr [rdi + 4*rax + 32]
    63  	movdqu	xmm7, xmmword ptr [rdi + 4*rax + 48]
    64  	pminsd	xmm0, xmm4
    65  	pminsd	xmm2, xmm5
    66  	pmaxsd	xmm1, xmm4
    67  	pmaxsd	xmm3, xmm5
    68  	pminsd	xmm0, xmm6
    69  	pminsd	xmm2, xmm7
    70  	pmaxsd	xmm1, xmm6
    71  	pmaxsd	xmm3, xmm7
    72  	add	rax, 16
    73  	add	r10, 2
    74  	jne	.LBB0_9
    75  # %bb.10:
    76  	test	r8b, 1
    77  	je	.LBB0_12
    78  .LBB0_11:
    79  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
    80  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
    81  	pmaxsd	xmm3, xmm5
    82  	pmaxsd	xmm1, xmm4
    83  	pminsd	xmm2, xmm5
    84  	pminsd	xmm0, xmm4
    85  .LBB0_12:
    86  	pminsd	xmm0, xmm2
    87  	pmaxsd	xmm1, xmm3
    88  	pshufd	xmm2, xmm1, 78                  # xmm2 = xmm1[2,3,0,1]
    89  	pmaxsd	xmm2, xmm1
    90  	pshufd	xmm1, xmm2, 229                 # xmm1 = xmm2[1,1,2,3]
    91  	pmaxsd	xmm1, xmm2
    92  	movd	eax, xmm1
    93  	pshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
    94  	pminsd	xmm1, xmm0
    95  	pshufd	xmm0, xmm1, 229                 # xmm0 = xmm1[1,1,2,3]
    96  	pminsd	xmm0, xmm1
    97  	movd	r8d, xmm0
    98  	cmp	r11, r9
    99  	je	.LBB0_13
   100  .LBB0_4:
   101  	mov	esi, eax
   102  	.p2align	4, 0x90
   103  .LBB0_5:                                # =>This Inner Loop Header: Depth=1
   104  	mov	eax, dword ptr [rdi + 4*r11]
   105  	cmp	r8d, eax
   106  	cmovg	r8d, eax
   107  	cmp	esi, eax
   108  	cmovge	eax, esi
   109  	add	r11, 1
   110  	mov	esi, eax
   111  	cmp	r9, r11
   112  	jne	.LBB0_5
   113  .LBB0_13:
   114  	mov	dword ptr [rcx], eax
   115  	mov	dword ptr [rdx], r8d
   116  	mov	rsp, rbp
   117  	pop	rbp
   118  	ret
   119  .LBB0_7:
   120  	movdqa	xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
   121  	movdqa	xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
   122  	xor	eax, eax
   123  	movdqa	xmm2, xmm0
   124  	movdqa	xmm3, xmm1
   125  	test	r8b, 1
   126  	jne	.LBB0_11
   127  	jmp	.LBB0_12
   128  .Lfunc_end0:
   129  	.size	int32_max_min_sse4, .Lfunc_end0-int32_max_min_sse4
   130                                          # -- End function
   131  	.globl	uint32_max_min_sse4             # -- Begin function uint32_max_min_sse4
   132  	.p2align	4, 0x90
   133  	.type	uint32_max_min_sse4,@function
   134  uint32_max_min_sse4:                    # @uint32_max_min_sse4
   135  # %bb.0:
   136  	push	rbp
   137  	mov	rbp, rsp
   138  	and	rsp, -8
   139  	test	esi, esi
   140  	jle	.LBB1_1
   141  # %bb.2:
   142  	mov	r9d, esi
   143  	cmp	esi, 7
   144  	ja	.LBB1_6
   145  # %bb.3:
   146  	xor	r11d, r11d
   147  	mov	r8d, -1
   148  	xor	esi, esi
   149  	jmp	.LBB1_4
   150  .LBB1_1:
   151  	mov	r8d, -1
   152  	xor	esi, esi
   153  	jmp	.LBB1_13
   154  .LBB1_6:
   155  	mov	r11d, r9d
   156  	and	r11d, -8
   157  	lea	rax, [r11 - 8]
   158  	mov	r8, rax
   159  	shr	r8, 3
   160  	add	r8, 1
   161  	test	rax, rax
   162  	je	.LBB1_7
   163  # %bb.8:
   164  	mov	r10, r8
   165  	and	r10, -2
   166  	neg	r10
   167  	pxor	xmm1, xmm1
   168  	pcmpeqd	xmm0, xmm0
   169  	xor	eax, eax
   170  	pcmpeqd	xmm2, xmm2
   171  	pxor	xmm3, xmm3
   172  	.p2align	4, 0x90
   173  .LBB1_9:                                # =>This Inner Loop Header: Depth=1
   174  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   175  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   176  	movdqu	xmm6, xmmword ptr [rdi + 4*rax + 32]
   177  	movdqu	xmm7, xmmword ptr [rdi + 4*rax + 48]
   178  	pminud	xmm0, xmm4
   179  	pminud	xmm2, xmm5
   180  	pmaxud	xmm1, xmm4
   181  	pmaxud	xmm3, xmm5
   182  	pminud	xmm0, xmm6
   183  	pminud	xmm2, xmm7
   184  	pmaxud	xmm1, xmm6
   185  	pmaxud	xmm3, xmm7
   186  	add	rax, 16
   187  	add	r10, 2
   188  	jne	.LBB1_9
   189  # %bb.10:
   190  	test	r8b, 1
   191  	je	.LBB1_12
   192  .LBB1_11:
   193  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   194  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   195  	pmaxud	xmm3, xmm5
   196  	pmaxud	xmm1, xmm4
   197  	pminud	xmm2, xmm5
   198  	pminud	xmm0, xmm4
   199  .LBB1_12:
   200  	pminud	xmm0, xmm2
   201  	pmaxud	xmm1, xmm3
   202  	pshufd	xmm2, xmm1, 78                  # xmm2 = xmm1[2,3,0,1]
   203  	pmaxud	xmm2, xmm1
   204  	pshufd	xmm1, xmm2, 229                 # xmm1 = xmm2[1,1,2,3]
   205  	pmaxud	xmm1, xmm2
   206  	movd	esi, xmm1
   207  	pshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
   208  	pminud	xmm1, xmm0
   209  	pshufd	xmm0, xmm1, 229                 # xmm0 = xmm1[1,1,2,3]
   210  	pminud	xmm0, xmm1
   211  	movd	r8d, xmm0
   212  	cmp	r11, r9
   213  	je	.LBB1_13
   214  .LBB1_4:
   215  	mov	eax, esi
   216  	.p2align	4, 0x90
   217  .LBB1_5:                                # =>This Inner Loop Header: Depth=1
   218  	mov	esi, dword ptr [rdi + 4*r11]
   219  	cmp	r8d, esi
   220  	cmovae	r8d, esi
   221  	cmp	eax, esi
   222  	cmova	esi, eax
   223  	add	r11, 1
   224  	mov	eax, esi
   225  	cmp	r9, r11
   226  	jne	.LBB1_5
   227  .LBB1_13:
   228  	mov	dword ptr [rcx], esi
   229  	mov	dword ptr [rdx], r8d
   230  	mov	rsp, rbp
   231  	pop	rbp
   232  	ret
   233  .LBB1_7:
   234  	pxor	xmm1, xmm1
   235  	pcmpeqd	xmm0, xmm0
   236  	xor	eax, eax
   237  	pcmpeqd	xmm2, xmm2
   238  	pxor	xmm3, xmm3
   239  	test	r8b, 1
   240  	jne	.LBB1_11
   241  	jmp	.LBB1_12
   242  .Lfunc_end1:
   243  	.size	uint32_max_min_sse4, .Lfunc_end1-uint32_max_min_sse4
   244                                          # -- End function
   245  	.section	.rodata.cst16,"aM",@progbits,16
   246  	.p2align	4                               # -- Begin function int64_max_min_sse4
   247  .LCPI2_0:
   248  	.quad	-9223372036854775808            # 0x8000000000000000
   249  	.quad	-9223372036854775808            # 0x8000000000000000
   250  .LCPI2_1:
   251  	.quad	9223372036854775807             # 0x7fffffffffffffff
   252  	.quad	9223372036854775807             # 0x7fffffffffffffff
   253  	.text
   254  	.globl	int64_max_min_sse4
   255  	.p2align	4, 0x90
   256  	.type	int64_max_min_sse4,@function
   257  int64_max_min_sse4:                     # @int64_max_min_sse4
   258  # %bb.0:
   259  	push	rbp
   260  	mov	rbp, rsp
   261  	and	rsp, -8
   262  	movabs	r8, 9223372036854775807
   263  	test	esi, esi
   264  	jle	.LBB2_1
   265  # %bb.2:
   266  	mov	r9d, esi
   267  	cmp	esi, 3
   268  	ja	.LBB2_6
   269  # %bb.3:
   270  	lea	rsi, [r8 + 1]
   271  	xor	r11d, r11d
   272  	jmp	.LBB2_4
   273  .LBB2_1:
   274  	lea	rsi, [r8 + 1]
   275  	jmp	.LBB2_13
   276  .LBB2_6:
   277  	mov	r11d, r9d
   278  	and	r11d, -4
   279  	lea	rax, [r11 - 4]
   280  	mov	r8, rax
   281  	shr	r8, 2
   282  	add	r8, 1
   283  	test	rax, rax
   284  	je	.LBB2_7
   285  # %bb.8:
   286  	mov	r10, r8
   287  	and	r10, -2
   288  	neg	r10
   289  	movdqa	xmm9, xmmword ptr [rip + .LCPI2_0] # xmm9 = [9223372036854775808,9223372036854775808]
   290  	movdqa	xmm8, xmmword ptr [rip + .LCPI2_1] # xmm8 = [9223372036854775807,9223372036854775807]
   291  	xor	eax, eax
   292  	movdqa	xmm2, xmm8
   293  	movdqa	xmm6, xmm9
   294  	.p2align	4, 0x90
   295  .LBB2_9:                                # =>This Inner Loop Header: Depth=1
   296  	movdqu	xmm7, xmmword ptr [rdi + 8*rax]
   297  	movdqa	xmm0, xmm7
   298  	pcmpgtq	xmm0, xmm8
   299  	movdqa	xmm4, xmm7
   300  	blendvpd	xmm4, xmm8, xmm0
   301  	movdqu	xmm1, xmmword ptr [rdi + 8*rax + 16]
   302  	movdqa	xmm0, xmm1
   303  	pcmpgtq	xmm0, xmm2
   304  	movdqa	xmm5, xmm1
   305  	blendvpd	xmm5, xmm2, xmm0
   306  	movdqa	xmm0, xmm9
   307  	pcmpgtq	xmm0, xmm7
   308  	blendvpd	xmm7, xmm9, xmm0
   309  	movdqa	xmm0, xmm6
   310  	pcmpgtq	xmm0, xmm1
   311  	blendvpd	xmm1, xmm6, xmm0
   312  	movdqu	xmm3, xmmword ptr [rdi + 8*rax + 32]
   313  	movdqa	xmm0, xmm3
   314  	pcmpgtq	xmm0, xmm4
   315  	movdqa	xmm8, xmm3
   316  	blendvpd	xmm8, xmm4, xmm0
   317  	movdqu	xmm4, xmmword ptr [rdi + 8*rax + 48]
   318  	movdqa	xmm0, xmm4
   319  	pcmpgtq	xmm0, xmm5
   320  	movdqa	xmm2, xmm4
   321  	blendvpd	xmm2, xmm5, xmm0
   322  	movapd	xmm0, xmm7
   323  	pcmpgtq	xmm0, xmm3
   324  	blendvpd	xmm3, xmm7, xmm0
   325  	movapd	xmm0, xmm1
   326  	pcmpgtq	xmm0, xmm4
   327  	blendvpd	xmm4, xmm1, xmm0
   328  	add	rax, 8
   329  	movapd	xmm9, xmm3
   330  	movapd	xmm6, xmm4
   331  	add	r10, 2
   332  	jne	.LBB2_9
   333  # %bb.10:
   334  	test	r8b, 1
   335  	je	.LBB2_12
   336  .LBB2_11:
   337  	movdqu	xmm1, xmmword ptr [rdi + 8*rax + 16]
   338  	movapd	xmm0, xmm4
   339  	pcmpgtq	xmm0, xmm1
   340  	movdqa	xmm5, xmm1
   341  	blendvpd	xmm5, xmm4, xmm0
   342  	movdqu	xmm4, xmmword ptr [rdi + 8*rax]
   343  	movapd	xmm0, xmm3
   344  	pcmpgtq	xmm0, xmm4
   345  	movdqa	xmm6, xmm4
   346  	blendvpd	xmm6, xmm3, xmm0
   347  	movdqa	xmm0, xmm1
   348  	pcmpgtq	xmm0, xmm2
   349  	blendvpd	xmm1, xmm2, xmm0
   350  	movdqa	xmm0, xmm4
   351  	pcmpgtq	xmm0, xmm8
   352  	blendvpd	xmm4, xmm8, xmm0
   353  	movapd	xmm8, xmm4
   354  	movapd	xmm2, xmm1
   355  	movapd	xmm3, xmm6
   356  	movapd	xmm4, xmm5
   357  .LBB2_12:
   358  	movapd	xmm0, xmm3
   359  	pcmpgtq	xmm0, xmm4
   360  	blendvpd	xmm4, xmm3, xmm0
   361  	pshufd	xmm1, xmm4, 78                  # xmm1 = xmm4[2,3,0,1]
   362  	movdqa	xmm0, xmm4
   363  	pcmpgtq	xmm0, xmm1
   364  	blendvpd	xmm1, xmm4, xmm0
   365  	movq	rsi, xmm1
   366  	movdqa	xmm0, xmm2
   367  	pcmpgtq	xmm0, xmm8
   368  	blendvpd	xmm2, xmm8, xmm0
   369  	pshufd	xmm1, xmm2, 78                  # xmm1 = xmm2[2,3,0,1]
   370  	movdqa	xmm0, xmm1
   371  	pcmpgtq	xmm0, xmm2
   372  	blendvpd	xmm1, xmm2, xmm0
   373  	movq	r8, xmm1
   374  	cmp	r11, r9
   375  	je	.LBB2_13
   376  .LBB2_4:
   377  	mov	rax, rsi
   378  	.p2align	4, 0x90
   379  .LBB2_5:                                # =>This Inner Loop Header: Depth=1
   380  	mov	rsi, qword ptr [rdi + 8*r11]
   381  	cmp	r8, rsi
   382  	cmovg	r8, rsi
   383  	cmp	rax, rsi
   384  	cmovge	rsi, rax
   385  	add	r11, 1
   386  	mov	rax, rsi
   387  	cmp	r9, r11
   388  	jne	.LBB2_5
   389  .LBB2_13:
   390  	mov	qword ptr [rcx], rsi
   391  	mov	qword ptr [rdx], r8
   392  	mov	rsp, rbp
   393  	pop	rbp
   394  	ret
   395  .LBB2_7:
   396  	movapd	xmm3, xmmword ptr [rip + .LCPI2_0] # xmm3 = [9223372036854775808,9223372036854775808]
   397  	movdqa	xmm8, xmmword ptr [rip + .LCPI2_1] # xmm8 = [9223372036854775807,9223372036854775807]
   398  	xor	eax, eax
   399  	movdqa	xmm2, xmm8
   400  	movapd	xmm4, xmm3
   401  	test	r8b, 1
   402  	jne	.LBB2_11
   403  	jmp	.LBB2_12
   404  .Lfunc_end2:
   405  	.size	int64_max_min_sse4, .Lfunc_end2-int64_max_min_sse4
   406                                          # -- End function
   407  	.section	.rodata.cst16,"aM",@progbits,16
   408  	.p2align	4                               # -- Begin function uint64_max_min_sse4
   409  .LCPI3_0:
   410  	.quad	-9223372036854775808            # 0x8000000000000000
   411  	.quad	-9223372036854775808            # 0x8000000000000000
   412  	.text
   413  	.globl	uint64_max_min_sse4
   414  	.p2align	4, 0x90
   415  	.type	uint64_max_min_sse4,@function
   416  uint64_max_min_sse4:                    # @uint64_max_min_sse4
   417  # %bb.0:
   418  	push	rbp
   419  	mov	rbp, rsp
   420  	and	rsp, -8
   421  	test	esi, esi
   422  	jle	.LBB3_1
   423  # %bb.2:
   424  	mov	r9d, esi
   425  	cmp	esi, 3
   426  	ja	.LBB3_6
   427  # %bb.3:
   428  	mov	r8, -1
   429  	xor	r11d, r11d
   430  	xor	eax, eax
   431  	jmp	.LBB3_4
   432  .LBB3_1:
   433  	mov	r8, -1
   434  	xor	eax, eax
   435  	jmp	.LBB3_13
   436  .LBB3_6:
   437  	mov	r11d, r9d
   438  	and	r11d, -4
   439  	lea	rax, [r11 - 4]
   440  	mov	r8, rax
   441  	shr	r8, 2
   442  	add	r8, 1
   443  	test	rax, rax
   444  	je	.LBB3_7
   445  # %bb.8:
   446  	mov	r10, r8
   447  	and	r10, -2
   448  	neg	r10
   449  	pxor	xmm9, xmm9
   450  	pcmpeqd	xmm10, xmm10
   451  	xor	eax, eax
   452  	movdqa	xmm8, xmmword ptr [rip + .LCPI3_0] # xmm8 = [9223372036854775808,9223372036854775808]
   453  	pcmpeqd	xmm11, xmm11
   454  	pxor	xmm12, xmm12
   455  	.p2align	4, 0x90
   456  .LBB3_9:                                # =>This Inner Loop Header: Depth=1
   457  	movdqa	xmm2, xmm10
   458  	pxor	xmm2, xmm8
   459  	movdqu	xmm4, xmmword ptr [rdi + 8*rax]
   460  	movdqu	xmm5, xmmword ptr [rdi + 8*rax + 16]
   461  	movdqu	xmm13, xmmword ptr [rdi + 8*rax + 32]
   462  	movdqa	xmm0, xmm4
   463  	pxor	xmm0, xmm8
   464  	movdqa	xmm1, xmm9
   465  	pxor	xmm1, xmm8
   466  	pcmpgtq	xmm1, xmm0
   467  	pcmpgtq	xmm0, xmm2
   468  	movdqa	xmm3, xmm4
   469  	blendvpd	xmm3, xmm10, xmm0
   470  	movdqu	xmm6, xmmword ptr [rdi + 8*rax + 48]
   471  	movdqa	xmm7, xmm11
   472  	pxor	xmm7, xmm8
   473  	movdqa	xmm0, xmm5
   474  	pxor	xmm0, xmm8
   475  	movdqa	xmm2, xmm12
   476  	pxor	xmm2, xmm8
   477  	pcmpgtq	xmm2, xmm0
   478  	pcmpgtq	xmm0, xmm7
   479  	movdqa	xmm7, xmm5
   480  	blendvpd	xmm7, xmm11, xmm0
   481  	movdqa	xmm0, xmm1
   482  	blendvpd	xmm4, xmm9, xmm0
   483  	movdqa	xmm0, xmm2
   484  	blendvpd	xmm5, xmm12, xmm0
   485  	movapd	xmm2, xmm3
   486  	xorpd	xmm2, xmm8
   487  	movdqa	xmm0, xmm13
   488  	pxor	xmm0, xmm8
   489  	movapd	xmm1, xmm4
   490  	xorpd	xmm1, xmm8
   491  	pcmpgtq	xmm1, xmm0
   492  	pcmpgtq	xmm0, xmm2
   493  	movdqa	xmm10, xmm13
   494  	blendvpd	xmm10, xmm3, xmm0
   495  	movapd	xmm3, xmm7
   496  	xorpd	xmm3, xmm8
   497  	movdqa	xmm0, xmm6
   498  	pxor	xmm0, xmm8
   499  	movapd	xmm2, xmm5
   500  	xorpd	xmm2, xmm8
   501  	pcmpgtq	xmm2, xmm0
   502  	pcmpgtq	xmm0, xmm3
   503  	movdqa	xmm11, xmm6
   504  	blendvpd	xmm11, xmm7, xmm0
   505  	movdqa	xmm0, xmm1
   506  	blendvpd	xmm13, xmm4, xmm0
   507  	movdqa	xmm0, xmm2
   508  	blendvpd	xmm6, xmm5, xmm0
   509  	add	rax, 8
   510  	movapd	xmm9, xmm13
   511  	movapd	xmm12, xmm6
   512  	add	r10, 2
   513  	jne	.LBB3_9
   514  # %bb.10:
   515  	test	r8b, 1
   516  	je	.LBB3_12
   517  .LBB3_11:
   518  	movupd	xmm4, xmmword ptr [rdi + 8*rax]
   519  	movupd	xmm3, xmmword ptr [rdi + 8*rax + 16]
   520  	movapd	xmm5, xmmword ptr [rip + .LCPI3_0] # xmm5 = [9223372036854775808,9223372036854775808]
   521  	movapd	xmm0, xmm6
   522  	xorpd	xmm0, xmm5
   523  	movapd	xmm1, xmm3
   524  	xorpd	xmm1, xmm5
   525  	pcmpgtq	xmm0, xmm1
   526  	movapd	xmm7, xmm3
   527  	blendvpd	xmm7, xmm6, xmm0
   528  	movapd	xmm0, xmm13
   529  	xorpd	xmm0, xmm5
   530  	movapd	xmm2, xmm4
   531  	xorpd	xmm2, xmm5
   532  	pcmpgtq	xmm0, xmm2
   533  	movapd	xmm6, xmm4
   534  	blendvpd	xmm6, xmm13, xmm0
   535  	movapd	xmm0, xmm11
   536  	xorpd	xmm0, xmm5
   537  	pcmpgtq	xmm1, xmm0
   538  	movdqa	xmm0, xmm1
   539  	blendvpd	xmm3, xmm11, xmm0
   540  	xorpd	xmm5, xmm10
   541  	pcmpgtq	xmm2, xmm5
   542  	movdqa	xmm0, xmm2
   543  	blendvpd	xmm4, xmm10, xmm0
   544  	movapd	xmm10, xmm4
   545  	movapd	xmm11, xmm3
   546  	movapd	xmm13, xmm6
   547  	movapd	xmm6, xmm7
   548  .LBB3_12:
   549  	movapd	xmm1, xmmword ptr [rip + .LCPI3_0] # xmm1 = [9223372036854775808,9223372036854775808]
   550  	movapd	xmm2, xmm6
   551  	xorpd	xmm2, xmm1
   552  	movapd	xmm0, xmm13
   553  	xorpd	xmm0, xmm1
   554  	pcmpgtq	xmm0, xmm2
   555  	blendvpd	xmm6, xmm13, xmm0
   556  	pshufd	xmm2, xmm6, 78                  # xmm2 = xmm6[2,3,0,1]
   557  	movapd	xmm0, xmm6
   558  	xorpd	xmm0, xmm1
   559  	movdqa	xmm3, xmm2
   560  	pxor	xmm3, xmm1
   561  	pcmpgtq	xmm0, xmm3
   562  	blendvpd	xmm2, xmm6, xmm0
   563  	movq	rax, xmm2
   564  	movdqa	xmm2, xmm10
   565  	pxor	xmm2, xmm1
   566  	movdqa	xmm0, xmm11
   567  	pxor	xmm0, xmm1
   568  	pcmpgtq	xmm0, xmm2
   569  	blendvpd	xmm11, xmm10, xmm0
   570  	pshufd	xmm2, xmm11, 78                 # xmm2 = xmm11[2,3,0,1]
   571  	movdqa	xmm0, xmm11
   572  	pxor	xmm0, xmm1
   573  	pxor	xmm1, xmm2
   574  	pcmpgtq	xmm1, xmm0
   575  	movdqa	xmm0, xmm1
   576  	blendvpd	xmm2, xmm11, xmm0
   577  	movq	r8, xmm2
   578  	cmp	r11, r9
   579  	je	.LBB3_13
   580  .LBB3_4:
   581  	mov	rsi, rax
   582  	.p2align	4, 0x90
   583  .LBB3_5:                                # =>This Inner Loop Header: Depth=1
   584  	mov	rax, qword ptr [rdi + 8*r11]
   585  	cmp	r8, rax
   586  	cmovae	r8, rax
   587  	cmp	rsi, rax
   588  	cmova	rax, rsi
   589  	add	r11, 1
   590  	mov	rsi, rax
   591  	cmp	r9, r11
   592  	jne	.LBB3_5
   593  .LBB3_13:
   594  	mov	qword ptr [rcx], rax
   595  	mov	qword ptr [rdx], r8
   596  	mov	rsp, rbp
   597  	pop	rbp
   598  	ret
   599  .LBB3_7:
   600  	xorpd	xmm13, xmm13
   601  	pcmpeqd	xmm10, xmm10
   602  	xor	eax, eax
   603  	pcmpeqd	xmm11, xmm11
   604  	xorpd	xmm6, xmm6
   605  	test	r8b, 1
   606  	jne	.LBB3_11
   607  	jmp	.LBB3_12
   608  .Lfunc_end3:
   609  	.size	uint64_max_min_sse4, .Lfunc_end3-uint64_max_min_sse4
   610                                          # -- End function
   611  	.ident	"Debian clang version 11.1.0-++20210428103820+1fdec59bffc1-1~exp1~20210428204437.162"
   612  	.section	".note.GNU-stack","",@progbits
   613  	.addrsig