github.com/apache/arrow/go/v14@v14.0.2/internal/utils/_lib/min_max_sse4_amd64.s (about)

     1  	.text
     2  	.intel_syntax noprefix
     3  	.file	"min_max.c"
     4  	.section	.rodata.cst16,"aM",@progbits,16
     5  	.p2align	4                               # -- Begin function int8_max_min_sse4
     6  .LCPI0_0:
     7  	.zero	16,128
     8  .LCPI0_1:
     9  	.zero	16,127
    10  	.text
    11  	.globl	int8_max_min_sse4
    12  	.p2align	4, 0x90
    13  	.type	int8_max_min_sse4,@function
    14  int8_max_min_sse4:                      # @int8_max_min_sse4
    15  # %bb.0:
    16  	push	rbp
    17  	mov	rbp, rsp
    18  	and	rsp, -8
    19  	test	esi, esi
    20  	jle	.LBB0_1
    21  # %bb.2:
    22  	mov	r9d, esi
    23  	cmp	esi, 31
    24  	ja	.LBB0_4
    25  # %bb.3:
    26  	mov	r8b, -128
    27  	mov	sil, 127
    28  	xor	r11d, r11d
    29  	jmp	.LBB0_11
    30  .LBB0_1:
    31  	mov	sil, 127
    32  	mov	r8b, -128
    33  	jmp	.LBB0_12
    34  .LBB0_4:
    35  	mov	r11d, r9d
    36  	and	r11d, -32
    37  	lea	rax, [r11 - 32]
    38  	mov	r8, rax
    39  	shr	r8, 5
    40  	add	r8, 1
    41  	test	rax, rax
    42  	je	.LBB0_5
    43  # %bb.6:
    44  	mov	r10, r8
    45  	and	r10, -2
    46  	neg	r10
    47  	movdqa	xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
    48  	movdqa	xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
    49  	xor	eax, eax
    50  	movdqa	xmm2, xmm0
    51  	movdqa	xmm3, xmm1
    52  	.p2align	4, 0x90
    53  .LBB0_7:                                # =>This Inner Loop Header: Depth=1
    54  	movdqu	xmm4, xmmword ptr [rdi + rax]
    55  	movdqu	xmm5, xmmword ptr [rdi + rax + 16]
    56  	movdqu	xmm6, xmmword ptr [rdi + rax + 32]
    57  	movdqu	xmm7, xmmword ptr [rdi + rax + 48]
    58  	pminsb	xmm0, xmm4
    59  	pminsb	xmm2, xmm5
    60  	pmaxsb	xmm1, xmm4
    61  	pmaxsb	xmm3, xmm5
    62  	pminsb	xmm0, xmm6
    63  	pminsb	xmm2, xmm7
    64  	pmaxsb	xmm1, xmm6
    65  	pmaxsb	xmm3, xmm7
    66  	add	rax, 64
    67  	add	r10, 2
    68  	jne	.LBB0_7
    69  # %bb.8:
    70  	test	r8b, 1
    71  	je	.LBB0_10
    72  .LBB0_9:
    73  	movdqu	xmm4, xmmword ptr [rdi + rax]
    74  	movdqu	xmm5, xmmword ptr [rdi + rax + 16]
    75  	pmaxsb	xmm3, xmm5
    76  	pmaxsb	xmm1, xmm4
    77  	pminsb	xmm2, xmm5
    78  	pminsb	xmm0, xmm4
    79  .LBB0_10:
    80  	pminsb	xmm0, xmm2
    81  	pmaxsb	xmm1, xmm3
    82  	pxor	xmm1, xmmword ptr [rip + .LCPI0_1]
    83  	movdqa	xmm2, xmm1
    84  	psrlw	xmm2, 8
    85  	pminub	xmm2, xmm1
    86  	phminposuw	xmm1, xmm2
    87  	movd	r8d, xmm1
    88  	xor	r8b, 127
    89  	pxor	xmm0, xmmword ptr [rip + .LCPI0_0]
    90  	movdqa	xmm1, xmm0
    91  	psrlw	xmm1, 8
    92  	pminub	xmm1, xmm0
    93  	phminposuw	xmm0, xmm1
    94  	movd	esi, xmm0
    95  	xor	sil, -128
    96  	cmp	r11, r9
    97  	je	.LBB0_12
    98  	.p2align	4, 0x90
    99  .LBB0_11:                               # =>This Inner Loop Header: Depth=1
   100  	movzx	eax, byte ptr [rdi + r11]
   101  	cmp	sil, al
   102  	movzx	esi, sil
   103  	cmovg	esi, eax
   104  	cmp	r8b, al
   105  	movzx	r8d, r8b
   106  	cmovl	r8d, eax
   107  	add	r11, 1
   108  	cmp	r9, r11
   109  	jne	.LBB0_11
   110  .LBB0_12:
   111  	mov	byte ptr [rcx], r8b
   112  	mov	byte ptr [rdx], sil
   113  	mov	rsp, rbp
   114  	pop	rbp
   115  	ret
   116  .LBB0_5:
   117  	movdqa	xmm1, xmmword ptr [rip + .LCPI0_0] # xmm1 = [128,128,128,128,128,128,128,128,128,128,128,128,128,128,128,128]
   118  	movdqa	xmm0, xmmword ptr [rip + .LCPI0_1] # xmm0 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127]
   119  	xor	eax, eax
   120  	movdqa	xmm2, xmm0
   121  	movdqa	xmm3, xmm1
   122  	test	r8b, 1
   123  	jne	.LBB0_9
   124  	jmp	.LBB0_10
   125  .Lfunc_end0:
   126  	.size	int8_max_min_sse4, .Lfunc_end0-int8_max_min_sse4
   127                                          # -- End function
   128  	.globl	uint8_max_min_sse4              # -- Begin function uint8_max_min_sse4
   129  	.p2align	4, 0x90
   130  	.type	uint8_max_min_sse4,@function
   131  uint8_max_min_sse4:                     # @uint8_max_min_sse4
   132  # %bb.0:
   133  	push	rbp
   134  	mov	rbp, rsp
   135  	and	rsp, -8
   136  	test	esi, esi
   137  	jle	.LBB1_1
   138  # %bb.2:
   139  	mov	r9d, esi
   140  	cmp	esi, 31
   141  	ja	.LBB1_4
   142  # %bb.3:
   143  	mov	sil, -1
   144  	xor	r11d, r11d
   145  	xor	eax, eax
   146  	jmp	.LBB1_11
   147  .LBB1_1:
   148  	mov	sil, -1
   149  	xor	eax, eax
   150  	jmp	.LBB1_12
   151  .LBB1_4:
   152  	mov	r11d, r9d
   153  	and	r11d, -32
   154  	lea	rax, [r11 - 32]
   155  	mov	r8, rax
   156  	shr	r8, 5
   157  	add	r8, 1
   158  	test	rax, rax
   159  	je	.LBB1_5
   160  # %bb.6:
   161  	mov	r10, r8
   162  	and	r10, -2
   163  	neg	r10
   164  	pxor	xmm1, xmm1
   165  	pcmpeqd	xmm0, xmm0
   166  	xor	eax, eax
   167  	pcmpeqd	xmm2, xmm2
   168  	pxor	xmm3, xmm3
   169  	.p2align	4, 0x90
   170  .LBB1_7:                                # =>This Inner Loop Header: Depth=1
   171  	movdqu	xmm4, xmmword ptr [rdi + rax]
   172  	movdqu	xmm5, xmmword ptr [rdi + rax + 16]
   173  	movdqu	xmm6, xmmword ptr [rdi + rax + 32]
   174  	movdqu	xmm7, xmmword ptr [rdi + rax + 48]
   175  	pminub	xmm0, xmm4
   176  	pminub	xmm2, xmm5
   177  	pmaxub	xmm1, xmm4
   178  	pmaxub	xmm3, xmm5
   179  	pminub	xmm0, xmm6
   180  	pminub	xmm2, xmm7
   181  	pmaxub	xmm1, xmm6
   182  	pmaxub	xmm3, xmm7
   183  	add	rax, 64
   184  	add	r10, 2
   185  	jne	.LBB1_7
   186  # %bb.8:
   187  	test	r8b, 1
   188  	je	.LBB1_10
   189  .LBB1_9:
   190  	movdqu	xmm4, xmmword ptr [rdi + rax]
   191  	movdqu	xmm5, xmmword ptr [rdi + rax + 16]
   192  	pmaxub	xmm3, xmm5
   193  	pmaxub	xmm1, xmm4
   194  	pminub	xmm2, xmm5
   195  	pminub	xmm0, xmm4
   196  .LBB1_10:
   197  	pminub	xmm0, xmm2
   198  	pmaxub	xmm1, xmm3
   199  	pcmpeqd	xmm2, xmm2
   200  	pxor	xmm2, xmm1
   201  	movdqa	xmm1, xmm2
   202  	psrlw	xmm1, 8
   203  	pminub	xmm1, xmm2
   204  	phminposuw	xmm1, xmm1
   205  	movd	eax, xmm1
   206  	not	al
   207  	movdqa	xmm1, xmm0
   208  	psrlw	xmm1, 8
   209  	pminub	xmm1, xmm0
   210  	phminposuw	xmm0, xmm1
   211  	movd	esi, xmm0
   212  	cmp	r11, r9
   213  	je	.LBB1_12
   214  	.p2align	4, 0x90
   215  .LBB1_11:                               # =>This Inner Loop Header: Depth=1
   216  	movzx	r8d, byte ptr [rdi + r11]
   217  	cmp	sil, r8b
   218  	movzx	esi, sil
   219  	cmovae	esi, r8d
   220  	cmp	al, r8b
   221  	movzx	eax, al
   222  	cmovbe	eax, r8d
   223  	add	r11, 1
   224  	cmp	r9, r11
   225  	jne	.LBB1_11
   226  .LBB1_12:
   227  	mov	byte ptr [rcx], al
   228  	mov	byte ptr [rdx], sil
   229  	mov	rsp, rbp
   230  	pop	rbp
   231  	ret
   232  .LBB1_5:
   233  	pxor	xmm1, xmm1
   234  	pcmpeqd	xmm0, xmm0
   235  	xor	eax, eax
   236  	pcmpeqd	xmm2, xmm2
   237  	pxor	xmm3, xmm3
   238  	test	r8b, 1
   239  	jne	.LBB1_9
   240  	jmp	.LBB1_10
   241  .Lfunc_end1:
   242  	.size	uint8_max_min_sse4, .Lfunc_end1-uint8_max_min_sse4
   243                                          # -- End function
   244  	.section	.rodata.cst16,"aM",@progbits,16
   245  	.p2align	4                               # -- Begin function int16_max_min_sse4
   246  .LCPI2_0:
   247  	.short	32768                           # 0x8000
   248  	.short	32768                           # 0x8000
   249  	.short	32768                           # 0x8000
   250  	.short	32768                           # 0x8000
   251  	.short	32768                           # 0x8000
   252  	.short	32768                           # 0x8000
   253  	.short	32768                           # 0x8000
   254  	.short	32768                           # 0x8000
   255  .LCPI2_1:
   256  	.short	32767                           # 0x7fff
   257  	.short	32767                           # 0x7fff
   258  	.short	32767                           # 0x7fff
   259  	.short	32767                           # 0x7fff
   260  	.short	32767                           # 0x7fff
   261  	.short	32767                           # 0x7fff
   262  	.short	32767                           # 0x7fff
   263  	.short	32767                           # 0x7fff
   264  	.text
   265  	.globl	int16_max_min_sse4
   266  	.p2align	4, 0x90
   267  	.type	int16_max_min_sse4,@function
   268  int16_max_min_sse4:                     # @int16_max_min_sse4
   269  # %bb.0:
   270  	push	rbp
   271  	mov	rbp, rsp
   272  	and	rsp, -8
   273  	test	esi, esi
   274  	jle	.LBB2_1
   275  # %bb.2:
   276  	mov	r9d, esi
   277  	cmp	esi, 15
   278  	ja	.LBB2_4
   279  # %bb.3:
   280  	mov	r8w, -32768
   281  	mov	si, 32767
   282  	xor	r11d, r11d
   283  	jmp	.LBB2_11
   284  .LBB2_1:
   285  	mov	si, 32767
   286  	mov	r8w, -32768
   287  	jmp	.LBB2_12
   288  .LBB2_4:
   289  	mov	r11d, r9d
   290  	and	r11d, -16
   291  	lea	rax, [r11 - 16]
   292  	mov	r8, rax
   293  	shr	r8, 4
   294  	add	r8, 1
   295  	test	rax, rax
   296  	je	.LBB2_5
   297  # %bb.6:
   298  	mov	r10, r8
   299  	and	r10, -2
   300  	neg	r10
   301  	movdqa	xmm1, xmmword ptr [rip + .LCPI2_0] # xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   302  	movdqa	xmm0, xmmword ptr [rip + .LCPI2_1] # xmm0 = [32767,32767,32767,32767,32767,32767,32767,32767]
   303  	xor	eax, eax
   304  	movdqa	xmm2, xmm0
   305  	movdqa	xmm3, xmm1
   306  	.p2align	4, 0x90
   307  .LBB2_7:                                # =>This Inner Loop Header: Depth=1
   308  	movdqu	xmm4, xmmword ptr [rdi + 2*rax]
   309  	movdqu	xmm5, xmmword ptr [rdi + 2*rax + 16]
   310  	movdqu	xmm6, xmmword ptr [rdi + 2*rax + 32]
   311  	movdqu	xmm7, xmmword ptr [rdi + 2*rax + 48]
   312  	pminsw	xmm0, xmm4
   313  	pminsw	xmm2, xmm5
   314  	pmaxsw	xmm1, xmm4
   315  	pmaxsw	xmm3, xmm5
   316  	pminsw	xmm0, xmm6
   317  	pminsw	xmm2, xmm7
   318  	pmaxsw	xmm1, xmm6
   319  	pmaxsw	xmm3, xmm7
   320  	add	rax, 32
   321  	add	r10, 2
   322  	jne	.LBB2_7
   323  # %bb.8:
   324  	test	r8b, 1
   325  	je	.LBB2_10
   326  .LBB2_9:
   327  	movdqu	xmm4, xmmword ptr [rdi + 2*rax]
   328  	movdqu	xmm5, xmmword ptr [rdi + 2*rax + 16]
   329  	pmaxsw	xmm3, xmm5
   330  	pmaxsw	xmm1, xmm4
   331  	pminsw	xmm2, xmm5
   332  	pminsw	xmm0, xmm4
   333  .LBB2_10:
   334  	pminsw	xmm0, xmm2
   335  	pmaxsw	xmm1, xmm3
   336  	pxor	xmm1, xmmword ptr [rip + .LCPI2_1]
   337  	phminposuw	xmm1, xmm1
   338  	movd	r8d, xmm1
   339  	xor	r8d, 32767
   340  	pxor	xmm0, xmmword ptr [rip + .LCPI2_0]
   341  	phminposuw	xmm0, xmm0
   342  	movd	esi, xmm0
   343  	xor	esi, 32768
   344  	cmp	r11, r9
   345  	je	.LBB2_12
   346  	.p2align	4, 0x90
   347  .LBB2_11:                               # =>This Inner Loop Header: Depth=1
   348  	movzx	eax, word ptr [rdi + 2*r11]
   349  	cmp	si, ax
   350  	cmovg	esi, eax
   351  	cmp	r8w, ax
   352  	cmovl	r8d, eax
   353  	add	r11, 1
   354  	cmp	r9, r11
   355  	jne	.LBB2_11
   356  .LBB2_12:
   357  	mov	word ptr [rcx], r8w
   358  	mov	word ptr [rdx], si
   359  	mov	rsp, rbp
   360  	pop	rbp
   361  	ret
   362  .LBB2_5:
   363  	movdqa	xmm1, xmmword ptr [rip + .LCPI2_0] # xmm1 = [32768,32768,32768,32768,32768,32768,32768,32768]
   364  	movdqa	xmm0, xmmword ptr [rip + .LCPI2_1] # xmm0 = [32767,32767,32767,32767,32767,32767,32767,32767]
   365  	xor	eax, eax
   366  	movdqa	xmm2, xmm0
   367  	movdqa	xmm3, xmm1
   368  	test	r8b, 1
   369  	jne	.LBB2_9
   370  	jmp	.LBB2_10
   371  .Lfunc_end2:
   372  	.size	int16_max_min_sse4, .Lfunc_end2-int16_max_min_sse4
   373                                          # -- End function
   374  	.globl	uint16_max_min_sse4             # -- Begin function uint16_max_min_sse4
   375  	.p2align	4, 0x90
   376  	.type	uint16_max_min_sse4,@function
   377  uint16_max_min_sse4:                    # @uint16_max_min_sse4
   378  # %bb.0:
   379  	push	rbp
   380  	mov	rbp, rsp
   381  	and	rsp, -8
   382  	test	esi, esi
   383  	jle	.LBB3_1
   384  # %bb.2:
   385  	mov	r9d, esi
   386  	cmp	esi, 15
   387  	ja	.LBB3_4
   388  # %bb.3:
   389  	mov	r8w, -1
   390  	xor	r11d, r11d
   391  	xor	esi, esi
   392  	jmp	.LBB3_11
   393  .LBB3_1:
   394  	mov	r8w, -1
   395  	xor	esi, esi
   396  	jmp	.LBB3_12
   397  .LBB3_4:
   398  	mov	r11d, r9d
   399  	and	r11d, -16
   400  	lea	rax, [r11 - 16]
   401  	mov	r8, rax
   402  	shr	r8, 4
   403  	add	r8, 1
   404  	test	rax, rax
   405  	je	.LBB3_5
   406  # %bb.6:
   407  	mov	r10, r8
   408  	and	r10, -2
   409  	neg	r10
   410  	pxor	xmm1, xmm1
   411  	pcmpeqd	xmm0, xmm0
   412  	xor	eax, eax
   413  	pcmpeqd	xmm2, xmm2
   414  	pxor	xmm3, xmm3
   415  	.p2align	4, 0x90
   416  .LBB3_7:                                # =>This Inner Loop Header: Depth=1
   417  	movdqu	xmm4, xmmword ptr [rdi + 2*rax]
   418  	movdqu	xmm5, xmmword ptr [rdi + 2*rax + 16]
   419  	movdqu	xmm6, xmmword ptr [rdi + 2*rax + 32]
   420  	movdqu	xmm7, xmmword ptr [rdi + 2*rax + 48]
   421  	pminuw	xmm0, xmm4
   422  	pminuw	xmm2, xmm5
   423  	pmaxuw	xmm1, xmm4
   424  	pmaxuw	xmm3, xmm5
   425  	pminuw	xmm0, xmm6
   426  	pminuw	xmm2, xmm7
   427  	pmaxuw	xmm1, xmm6
   428  	pmaxuw	xmm3, xmm7
   429  	add	rax, 32
   430  	add	r10, 2
   431  	jne	.LBB3_7
   432  # %bb.8:
   433  	test	r8b, 1
   434  	je	.LBB3_10
   435  .LBB3_9:
   436  	movdqu	xmm4, xmmword ptr [rdi + 2*rax]
   437  	movdqu	xmm5, xmmword ptr [rdi + 2*rax + 16]
   438  	pmaxuw	xmm3, xmm5
   439  	pmaxuw	xmm1, xmm4
   440  	pminuw	xmm2, xmm5
   441  	pminuw	xmm0, xmm4
   442  .LBB3_10:
   443  	pminuw	xmm0, xmm2
   444  	pmaxuw	xmm1, xmm3
   445  	pcmpeqd	xmm2, xmm2
   446  	pxor	xmm2, xmm1
   447  	phminposuw	xmm1, xmm2
   448  	movd	esi, xmm1
   449  	not	esi
   450  	phminposuw	xmm0, xmm0
   451  	movd	r8d, xmm0
   452  	cmp	r11, r9
   453  	je	.LBB3_12
   454  	.p2align	4, 0x90
   455  .LBB3_11:                               # =>This Inner Loop Header: Depth=1
   456  	movzx	eax, word ptr [rdi + 2*r11]
   457  	cmp	r8w, ax
   458  	cmovae	r8d, eax
   459  	cmp	si, ax
   460  	cmovbe	esi, eax
   461  	add	r11, 1
   462  	cmp	r9, r11
   463  	jne	.LBB3_11
   464  .LBB3_12:
   465  	mov	word ptr [rcx], si
   466  	mov	word ptr [rdx], r8w
   467  	mov	rsp, rbp
   468  	pop	rbp
   469  	ret
   470  .LBB3_5:
   471  	pxor	xmm1, xmm1
   472  	pcmpeqd	xmm0, xmm0
   473  	xor	eax, eax
   474  	pcmpeqd	xmm2, xmm2
   475  	pxor	xmm3, xmm3
   476  	test	r8b, 1
   477  	jne	.LBB3_9
   478  	jmp	.LBB3_10
   479  .Lfunc_end3:
   480  	.size	uint16_max_min_sse4, .Lfunc_end3-uint16_max_min_sse4
   481                                          # -- End function
   482  	.section	.rodata.cst16,"aM",@progbits,16
   483  	.p2align	4                               # -- Begin function int32_max_min_sse4
   484  .LCPI4_0:
   485  	.long	2147483648                      # 0x80000000
   486  	.long	2147483648                      # 0x80000000
   487  	.long	2147483648                      # 0x80000000
   488  	.long	2147483648                      # 0x80000000
   489  .LCPI4_1:
   490  	.long	2147483647                      # 0x7fffffff
   491  	.long	2147483647                      # 0x7fffffff
   492  	.long	2147483647                      # 0x7fffffff
   493  	.long	2147483647                      # 0x7fffffff
   494  	.text
   495  	.globl	int32_max_min_sse4
   496  	.p2align	4, 0x90
   497  	.type	int32_max_min_sse4,@function
   498  int32_max_min_sse4:                     # @int32_max_min_sse4
   499  # %bb.0:
   500  	push	rbp
   501  	mov	rbp, rsp
   502  	and	rsp, -8
   503  	test	esi, esi
   504  	jle	.LBB4_1
   505  # %bb.2:
   506  	mov	r9d, esi
   507  	cmp	esi, 7
   508  	ja	.LBB4_6
   509  # %bb.3:
   510  	mov	eax, -2147483648
   511  	mov	r8d, 2147483647
   512  	xor	r11d, r11d
   513  	jmp	.LBB4_4
   514  .LBB4_1:
   515  	mov	r8d, 2147483647
   516  	mov	eax, -2147483648
   517  	jmp	.LBB4_13
   518  .LBB4_6:
   519  	mov	r11d, r9d
   520  	and	r11d, -8
   521  	lea	rax, [r11 - 8]
   522  	mov	r8, rax
   523  	shr	r8, 3
   524  	add	r8, 1
   525  	test	rax, rax
   526  	je	.LBB4_7
   527  # %bb.8:
   528  	mov	r10, r8
   529  	and	r10, -2
   530  	neg	r10
   531  	movdqa	xmm1, xmmword ptr [rip + .LCPI4_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
   532  	movdqa	xmm0, xmmword ptr [rip + .LCPI4_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
   533  	xor	eax, eax
   534  	movdqa	xmm2, xmm0
   535  	movdqa	xmm3, xmm1
   536  	.p2align	4, 0x90
   537  .LBB4_9:                                # =>This Inner Loop Header: Depth=1
   538  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   539  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   540  	movdqu	xmm6, xmmword ptr [rdi + 4*rax + 32]
   541  	movdqu	xmm7, xmmword ptr [rdi + 4*rax + 48]
   542  	pminsd	xmm0, xmm4
   543  	pminsd	xmm2, xmm5
   544  	pmaxsd	xmm1, xmm4
   545  	pmaxsd	xmm3, xmm5
   546  	pminsd	xmm0, xmm6
   547  	pminsd	xmm2, xmm7
   548  	pmaxsd	xmm1, xmm6
   549  	pmaxsd	xmm3, xmm7
   550  	add	rax, 16
   551  	add	r10, 2
   552  	jne	.LBB4_9
   553  # %bb.10:
   554  	test	r8b, 1
   555  	je	.LBB4_12
   556  .LBB4_11:
   557  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   558  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   559  	pmaxsd	xmm3, xmm5
   560  	pmaxsd	xmm1, xmm4
   561  	pminsd	xmm2, xmm5
   562  	pminsd	xmm0, xmm4
   563  .LBB4_12:
   564  	pminsd	xmm0, xmm2
   565  	pmaxsd	xmm1, xmm3
   566  	pshufd	xmm2, xmm1, 78                  # xmm2 = xmm1[2,3,0,1]
   567  	pmaxsd	xmm2, xmm1
   568  	pshufd	xmm1, xmm2, 229                 # xmm1 = xmm2[1,1,2,3]
   569  	pmaxsd	xmm1, xmm2
   570  	movd	eax, xmm1
   571  	pshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
   572  	pminsd	xmm1, xmm0
   573  	pshufd	xmm0, xmm1, 229                 # xmm0 = xmm1[1,1,2,3]
   574  	pminsd	xmm0, xmm1
   575  	movd	r8d, xmm0
   576  	cmp	r11, r9
   577  	je	.LBB4_13
   578  .LBB4_4:
   579  	mov	esi, eax
   580  	.p2align	4, 0x90
   581  .LBB4_5:                                # =>This Inner Loop Header: Depth=1
   582  	mov	eax, dword ptr [rdi + 4*r11]
   583  	cmp	r8d, eax
   584  	cmovg	r8d, eax
   585  	cmp	esi, eax
   586  	cmovge	eax, esi
   587  	add	r11, 1
   588  	mov	esi, eax
   589  	cmp	r9, r11
   590  	jne	.LBB4_5
   591  .LBB4_13:
   592  	mov	dword ptr [rcx], eax
   593  	mov	dword ptr [rdx], r8d
   594  	mov	rsp, rbp
   595  	pop	rbp
   596  	ret
   597  .LBB4_7:
   598  	movdqa	xmm1, xmmword ptr [rip + .LCPI4_0] # xmm1 = [2147483648,2147483648,2147483648,2147483648]
   599  	movdqa	xmm0, xmmword ptr [rip + .LCPI4_1] # xmm0 = [2147483647,2147483647,2147483647,2147483647]
   600  	xor	eax, eax
   601  	movdqa	xmm2, xmm0
   602  	movdqa	xmm3, xmm1
   603  	test	r8b, 1
   604  	jne	.LBB4_11
   605  	jmp	.LBB4_12
   606  .Lfunc_end4:
   607  	.size	int32_max_min_sse4, .Lfunc_end4-int32_max_min_sse4
   608                                          # -- End function
   609  	.globl	uint32_max_min_sse4             # -- Begin function uint32_max_min_sse4
   610  	.p2align	4, 0x90
   611  	.type	uint32_max_min_sse4,@function
   612  uint32_max_min_sse4:                    # @uint32_max_min_sse4
   613  # %bb.0:
   614  	push	rbp
   615  	mov	rbp, rsp
   616  	and	rsp, -8
   617  	test	esi, esi
   618  	jle	.LBB5_1
   619  # %bb.2:
   620  	mov	r9d, esi
   621  	cmp	esi, 7
   622  	ja	.LBB5_6
   623  # %bb.3:
   624  	xor	r11d, r11d
   625  	mov	r8d, -1
   626  	xor	esi, esi
   627  	jmp	.LBB5_4
   628  .LBB5_1:
   629  	mov	r8d, -1
   630  	xor	esi, esi
   631  	jmp	.LBB5_13
   632  .LBB5_6:
   633  	mov	r11d, r9d
   634  	and	r11d, -8
   635  	lea	rax, [r11 - 8]
   636  	mov	r8, rax
   637  	shr	r8, 3
   638  	add	r8, 1
   639  	test	rax, rax
   640  	je	.LBB5_7
   641  # %bb.8:
   642  	mov	r10, r8
   643  	and	r10, -2
   644  	neg	r10
   645  	pxor	xmm1, xmm1
   646  	pcmpeqd	xmm0, xmm0
   647  	xor	eax, eax
   648  	pcmpeqd	xmm2, xmm2
   649  	pxor	xmm3, xmm3
   650  	.p2align	4, 0x90
   651  .LBB5_9:                                # =>This Inner Loop Header: Depth=1
   652  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   653  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   654  	movdqu	xmm6, xmmword ptr [rdi + 4*rax + 32]
   655  	movdqu	xmm7, xmmword ptr [rdi + 4*rax + 48]
   656  	pminud	xmm0, xmm4
   657  	pminud	xmm2, xmm5
   658  	pmaxud	xmm1, xmm4
   659  	pmaxud	xmm3, xmm5
   660  	pminud	xmm0, xmm6
   661  	pminud	xmm2, xmm7
   662  	pmaxud	xmm1, xmm6
   663  	pmaxud	xmm3, xmm7
   664  	add	rax, 16
   665  	add	r10, 2
   666  	jne	.LBB5_9
   667  # %bb.10:
   668  	test	r8b, 1
   669  	je	.LBB5_12
   670  .LBB5_11:
   671  	movdqu	xmm4, xmmword ptr [rdi + 4*rax]
   672  	movdqu	xmm5, xmmword ptr [rdi + 4*rax + 16]
   673  	pmaxud	xmm3, xmm5
   674  	pmaxud	xmm1, xmm4
   675  	pminud	xmm2, xmm5
   676  	pminud	xmm0, xmm4
   677  .LBB5_12:
   678  	pminud	xmm0, xmm2
   679  	pmaxud	xmm1, xmm3
   680  	pshufd	xmm2, xmm1, 78                  # xmm2 = xmm1[2,3,0,1]
   681  	pmaxud	xmm2, xmm1
   682  	pshufd	xmm1, xmm2, 229                 # xmm1 = xmm2[1,1,2,3]
   683  	pmaxud	xmm1, xmm2
   684  	movd	esi, xmm1
   685  	pshufd	xmm1, xmm0, 78                  # xmm1 = xmm0[2,3,0,1]
   686  	pminud	xmm1, xmm0
   687  	pshufd	xmm0, xmm1, 229                 # xmm0 = xmm1[1,1,2,3]
   688  	pminud	xmm0, xmm1
   689  	movd	r8d, xmm0
   690  	cmp	r11, r9
   691  	je	.LBB5_13
   692  .LBB5_4:
   693  	mov	eax, esi
   694  	.p2align	4, 0x90
   695  .LBB5_5:                                # =>This Inner Loop Header: Depth=1
   696  	mov	esi, dword ptr [rdi + 4*r11]
   697  	cmp	r8d, esi
   698  	cmovae	r8d, esi
   699  	cmp	eax, esi
   700  	cmova	esi, eax
   701  	add	r11, 1
   702  	mov	eax, esi
   703  	cmp	r9, r11
   704  	jne	.LBB5_5
   705  .LBB5_13:
   706  	mov	dword ptr [rcx], esi
   707  	mov	dword ptr [rdx], r8d
   708  	mov	rsp, rbp
   709  	pop	rbp
   710  	ret
   711  .LBB5_7:
   712  	pxor	xmm1, xmm1
   713  	pcmpeqd	xmm0, xmm0
   714  	xor	eax, eax
   715  	pcmpeqd	xmm2, xmm2
   716  	pxor	xmm3, xmm3
   717  	test	r8b, 1
   718  	jne	.LBB5_11
   719  	jmp	.LBB5_12
   720  .Lfunc_end5:
   721  	.size	uint32_max_min_sse4, .Lfunc_end5-uint32_max_min_sse4
   722                                          # -- End function
   723  	.section	.rodata.cst16,"aM",@progbits,16
   724  	.p2align	4                               # -- Begin function int64_max_min_sse4
   725  .LCPI6_0:
   726  	.quad	-9223372036854775808            # 0x8000000000000000
   727  	.quad	-9223372036854775808            # 0x8000000000000000
   728  .LCPI6_1:
   729  	.quad	9223372036854775807             # 0x7fffffffffffffff
   730  	.quad	9223372036854775807             # 0x7fffffffffffffff
   731  	.text
   732  	.globl	int64_max_min_sse4
   733  	.p2align	4, 0x90
   734  	.type	int64_max_min_sse4,@function
   735  int64_max_min_sse4:                     # @int64_max_min_sse4
   736  # %bb.0:
   737  	push	rbp
   738  	mov	rbp, rsp
   739  	and	rsp, -8
   740  	movabs	r8, 9223372036854775807
   741  	test	esi, esi
   742  	jle	.LBB6_1
   743  # %bb.2:
   744  	mov	r9d, esi
   745  	cmp	esi, 3
   746  	ja	.LBB6_6
   747  # %bb.3:
   748  	lea	rsi, [r8 + 1]
   749  	xor	r11d, r11d
   750  	jmp	.LBB6_4
   751  .LBB6_1:
   752  	lea	rsi, [r8 + 1]
   753  	jmp	.LBB6_13
   754  .LBB6_6:
   755  	mov	r11d, r9d
   756  	and	r11d, -4
   757  	lea	rax, [r11 - 4]
   758  	mov	r8, rax
   759  	shr	r8, 2
   760  	add	r8, 1
   761  	test	rax, rax
   762  	je	.LBB6_7
   763  # %bb.8:
   764  	mov	r10, r8
   765  	and	r10, -2
   766  	neg	r10
   767  	movdqa	xmm9, xmmword ptr [rip + .LCPI6_0] # xmm9 = [9223372036854775808,9223372036854775808]
   768  	movdqa	xmm8, xmmword ptr [rip + .LCPI6_1] # xmm8 = [9223372036854775807,9223372036854775807]
   769  	xor	eax, eax
   770  	movdqa	xmm2, xmm8
   771  	movdqa	xmm6, xmm9
   772  	.p2align	4, 0x90
   773  .LBB6_9:                                # =>This Inner Loop Header: Depth=1
   774  	movdqu	xmm7, xmmword ptr [rdi + 8*rax]
   775  	movdqa	xmm0, xmm7
   776  	pcmpgtq	xmm0, xmm8
   777  	movdqa	xmm4, xmm7
   778  	blendvpd	xmm4, xmm8, xmm0
   779  	movdqu	xmm1, xmmword ptr [rdi + 8*rax + 16]
   780  	movdqa	xmm0, xmm1
   781  	pcmpgtq	xmm0, xmm2
   782  	movdqa	xmm5, xmm1
   783  	blendvpd	xmm5, xmm2, xmm0
   784  	movdqa	xmm0, xmm9
   785  	pcmpgtq	xmm0, xmm7
   786  	blendvpd	xmm7, xmm9, xmm0
   787  	movdqa	xmm0, xmm6
   788  	pcmpgtq	xmm0, xmm1
   789  	blendvpd	xmm1, xmm6, xmm0
   790  	movdqu	xmm3, xmmword ptr [rdi + 8*rax + 32]
   791  	movdqa	xmm0, xmm3
   792  	pcmpgtq	xmm0, xmm4
   793  	movdqa	xmm8, xmm3
   794  	blendvpd	xmm8, xmm4, xmm0
   795  	movdqu	xmm4, xmmword ptr [rdi + 8*rax + 48]
   796  	movdqa	xmm0, xmm4
   797  	pcmpgtq	xmm0, xmm5
   798  	movdqa	xmm2, xmm4
   799  	blendvpd	xmm2, xmm5, xmm0
   800  	movapd	xmm0, xmm7
   801  	pcmpgtq	xmm0, xmm3
   802  	blendvpd	xmm3, xmm7, xmm0
   803  	movapd	xmm0, xmm1
   804  	pcmpgtq	xmm0, xmm4
   805  	blendvpd	xmm4, xmm1, xmm0
   806  	add	rax, 8
   807  	movapd	xmm9, xmm3
   808  	movapd	xmm6, xmm4
   809  	add	r10, 2
   810  	jne	.LBB6_9
   811  # %bb.10:
   812  	test	r8b, 1
   813  	je	.LBB6_12
   814  .LBB6_11:
   815  	movdqu	xmm1, xmmword ptr [rdi + 8*rax + 16]
   816  	movapd	xmm0, xmm4
   817  	pcmpgtq	xmm0, xmm1
   818  	movdqa	xmm5, xmm1
   819  	blendvpd	xmm5, xmm4, xmm0
   820  	movdqu	xmm4, xmmword ptr [rdi + 8*rax]
   821  	movapd	xmm0, xmm3
   822  	pcmpgtq	xmm0, xmm4
   823  	movdqa	xmm6, xmm4
   824  	blendvpd	xmm6, xmm3, xmm0
   825  	movdqa	xmm0, xmm1
   826  	pcmpgtq	xmm0, xmm2
   827  	blendvpd	xmm1, xmm2, xmm0
   828  	movdqa	xmm0, xmm4
   829  	pcmpgtq	xmm0, xmm8
   830  	blendvpd	xmm4, xmm8, xmm0
   831  	movapd	xmm8, xmm4
   832  	movapd	xmm2, xmm1
   833  	movapd	xmm3, xmm6
   834  	movapd	xmm4, xmm5
   835  .LBB6_12:
   836  	movapd	xmm0, xmm3
   837  	pcmpgtq	xmm0, xmm4
   838  	blendvpd	xmm4, xmm3, xmm0
   839  	pshufd	xmm1, xmm4, 78                  # xmm1 = xmm4[2,3,0,1]
   840  	movdqa	xmm0, xmm4
   841  	pcmpgtq	xmm0, xmm1
   842  	blendvpd	xmm1, xmm4, xmm0
   843  	movq	rsi, xmm1
   844  	movdqa	xmm0, xmm2
   845  	pcmpgtq	xmm0, xmm8
   846  	blendvpd	xmm2, xmm8, xmm0
   847  	pshufd	xmm1, xmm2, 78                  # xmm1 = xmm2[2,3,0,1]
   848  	movdqa	xmm0, xmm1
   849  	pcmpgtq	xmm0, xmm2
   850  	blendvpd	xmm1, xmm2, xmm0
   851  	movq	r8, xmm1
   852  	cmp	r11, r9
   853  	je	.LBB6_13
   854  .LBB6_4:
   855  	mov	rax, rsi
   856  	.p2align	4, 0x90
   857  .LBB6_5:                                # =>This Inner Loop Header: Depth=1
   858  	mov	rsi, qword ptr [rdi + 8*r11]
   859  	cmp	r8, rsi
   860  	cmovg	r8, rsi
   861  	cmp	rax, rsi
   862  	cmovge	rsi, rax
   863  	add	r11, 1
   864  	mov	rax, rsi
   865  	cmp	r9, r11
   866  	jne	.LBB6_5
   867  .LBB6_13:
   868  	mov	qword ptr [rcx], rsi
   869  	mov	qword ptr [rdx], r8
   870  	mov	rsp, rbp
   871  	pop	rbp
   872  	ret
   873  .LBB6_7:
   874  	movapd	xmm3, xmmword ptr [rip + .LCPI6_0] # xmm3 = [9223372036854775808,9223372036854775808]
   875  	movdqa	xmm8, xmmword ptr [rip + .LCPI6_1] # xmm8 = [9223372036854775807,9223372036854775807]
   876  	xor	eax, eax
   877  	movdqa	xmm2, xmm8
   878  	movapd	xmm4, xmm3
   879  	test	r8b, 1
   880  	jne	.LBB6_11
   881  	jmp	.LBB6_12
   882  .Lfunc_end6:
   883  	.size	int64_max_min_sse4, .Lfunc_end6-int64_max_min_sse4
   884                                          # -- End function
   885  	.section	.rodata.cst16,"aM",@progbits,16
   886  	.p2align	4                               # -- Begin function uint64_max_min_sse4
   887  .LCPI7_0:
   888  	.quad	-9223372036854775808            # 0x8000000000000000
   889  	.quad	-9223372036854775808            # 0x8000000000000000
   890  	.text
   891  	.globl	uint64_max_min_sse4
   892  	.p2align	4, 0x90
   893  	.type	uint64_max_min_sse4,@function
   894  uint64_max_min_sse4:                    # @uint64_max_min_sse4
   895  # %bb.0:
   896  	push	rbp
   897  	mov	rbp, rsp
   898  	and	rsp, -8
   899  	test	esi, esi
   900  	jle	.LBB7_1
   901  # %bb.2:
   902  	mov	r9d, esi
   903  	cmp	esi, 3
   904  	ja	.LBB7_6
   905  # %bb.3:
   906  	mov	r8, -1
   907  	xor	r11d, r11d
   908  	xor	eax, eax
   909  	jmp	.LBB7_4
   910  .LBB7_1:
   911  	mov	r8, -1
   912  	xor	eax, eax
   913  	jmp	.LBB7_13
   914  .LBB7_6:
   915  	mov	r11d, r9d
   916  	and	r11d, -4
   917  	lea	rax, [r11 - 4]
   918  	mov	r8, rax
   919  	shr	r8, 2
   920  	add	r8, 1
   921  	test	rax, rax
   922  	je	.LBB7_7
   923  # %bb.8:
   924  	mov	r10, r8
   925  	and	r10, -2
   926  	neg	r10
   927  	pxor	xmm9, xmm9
   928  	pcmpeqd	xmm10, xmm10
   929  	xor	eax, eax
   930  	movdqa	xmm8, xmmword ptr [rip + .LCPI7_0] # xmm8 = [9223372036854775808,9223372036854775808]
   931  	pcmpeqd	xmm11, xmm11
   932  	pxor	xmm12, xmm12
   933  	.p2align	4, 0x90
   934  .LBB7_9:                                # =>This Inner Loop Header: Depth=1
   935  	movdqa	xmm2, xmm10
   936  	pxor	xmm2, xmm8
   937  	movdqu	xmm4, xmmword ptr [rdi + 8*rax]
   938  	movdqu	xmm5, xmmword ptr [rdi + 8*rax + 16]
   939  	movdqu	xmm13, xmmword ptr [rdi + 8*rax + 32]
   940  	movdqa	xmm0, xmm4
   941  	pxor	xmm0, xmm8
   942  	movdqa	xmm1, xmm9
   943  	pxor	xmm1, xmm8
   944  	pcmpgtq	xmm1, xmm0
   945  	pcmpgtq	xmm0, xmm2
   946  	movdqa	xmm3, xmm4
   947  	blendvpd	xmm3, xmm10, xmm0
   948  	movdqu	xmm6, xmmword ptr [rdi + 8*rax + 48]
   949  	movdqa	xmm7, xmm11
   950  	pxor	xmm7, xmm8
   951  	movdqa	xmm0, xmm5
   952  	pxor	xmm0, xmm8
   953  	movdqa	xmm2, xmm12
   954  	pxor	xmm2, xmm8
   955  	pcmpgtq	xmm2, xmm0
   956  	pcmpgtq	xmm0, xmm7
   957  	movdqa	xmm7, xmm5
   958  	blendvpd	xmm7, xmm11, xmm0
   959  	movdqa	xmm0, xmm1
   960  	blendvpd	xmm4, xmm9, xmm0
   961  	movdqa	xmm0, xmm2
   962  	blendvpd	xmm5, xmm12, xmm0
   963  	movapd	xmm2, xmm3
   964  	xorpd	xmm2, xmm8
   965  	movdqa	xmm0, xmm13
   966  	pxor	xmm0, xmm8
   967  	movapd	xmm1, xmm4
   968  	xorpd	xmm1, xmm8
   969  	pcmpgtq	xmm1, xmm0
   970  	pcmpgtq	xmm0, xmm2
   971  	movdqa	xmm10, xmm13
   972  	blendvpd	xmm10, xmm3, xmm0
   973  	movapd	xmm3, xmm7
   974  	xorpd	xmm3, xmm8
   975  	movdqa	xmm0, xmm6
   976  	pxor	xmm0, xmm8
   977  	movapd	xmm2, xmm5
   978  	xorpd	xmm2, xmm8
   979  	pcmpgtq	xmm2, xmm0
   980  	pcmpgtq	xmm0, xmm3
   981  	movdqa	xmm11, xmm6
   982  	blendvpd	xmm11, xmm7, xmm0
   983  	movdqa	xmm0, xmm1
   984  	blendvpd	xmm13, xmm4, xmm0
   985  	movdqa	xmm0, xmm2
   986  	blendvpd	xmm6, xmm5, xmm0
   987  	add	rax, 8
   988  	movapd	xmm9, xmm13
   989  	movapd	xmm12, xmm6
   990  	add	r10, 2
   991  	jne	.LBB7_9
   992  # %bb.10:
   993  	test	r8b, 1
   994  	je	.LBB7_12
   995  .LBB7_11:
   996  	movupd	xmm4, xmmword ptr [rdi + 8*rax]
   997  	movupd	xmm3, xmmword ptr [rdi + 8*rax + 16]
   998  	movapd	xmm5, xmmword ptr [rip + .LCPI7_0] # xmm5 = [9223372036854775808,9223372036854775808]
   999  	movapd	xmm0, xmm6
  1000  	xorpd	xmm0, xmm5
  1001  	movapd	xmm1, xmm3
  1002  	xorpd	xmm1, xmm5
  1003  	pcmpgtq	xmm0, xmm1
  1004  	movapd	xmm7, xmm3
  1005  	blendvpd	xmm7, xmm6, xmm0
  1006  	movapd	xmm0, xmm13
  1007  	xorpd	xmm0, xmm5
  1008  	movapd	xmm2, xmm4
  1009  	xorpd	xmm2, xmm5
  1010  	pcmpgtq	xmm0, xmm2
  1011  	movapd	xmm6, xmm4
  1012  	blendvpd	xmm6, xmm13, xmm0
  1013  	movapd	xmm0, xmm11
  1014  	xorpd	xmm0, xmm5
  1015  	pcmpgtq	xmm1, xmm0
  1016  	movdqa	xmm0, xmm1
  1017  	blendvpd	xmm3, xmm11, xmm0
  1018  	xorpd	xmm5, xmm10
  1019  	pcmpgtq	xmm2, xmm5
  1020  	movdqa	xmm0, xmm2
  1021  	blendvpd	xmm4, xmm10, xmm0
  1022  	movapd	xmm10, xmm4
  1023  	movapd	xmm11, xmm3
  1024  	movapd	xmm13, xmm6
  1025  	movapd	xmm6, xmm7
  1026  .LBB7_12:
  1027  	movapd	xmm1, xmmword ptr [rip + .LCPI7_0] # xmm1 = [9223372036854775808,9223372036854775808]
  1028  	movapd	xmm2, xmm6
  1029  	xorpd	xmm2, xmm1
  1030  	movapd	xmm0, xmm13
  1031  	xorpd	xmm0, xmm1
  1032  	pcmpgtq	xmm0, xmm2
  1033  	blendvpd	xmm6, xmm13, xmm0
  1034  	pshufd	xmm2, xmm6, 78                  # xmm2 = xmm6[2,3,0,1]
  1035  	movapd	xmm0, xmm6
  1036  	xorpd	xmm0, xmm1
  1037  	movdqa	xmm3, xmm2
  1038  	pxor	xmm3, xmm1
  1039  	pcmpgtq	xmm0, xmm3
  1040  	blendvpd	xmm2, xmm6, xmm0
  1041  	movq	rax, xmm2
  1042  	movdqa	xmm2, xmm10
  1043  	pxor	xmm2, xmm1
  1044  	movdqa	xmm0, xmm11
  1045  	pxor	xmm0, xmm1
  1046  	pcmpgtq	xmm0, xmm2
  1047  	blendvpd	xmm11, xmm10, xmm0
  1048  	pshufd	xmm2, xmm11, 78                 # xmm2 = xmm11[2,3,0,1]
  1049  	movdqa	xmm0, xmm11
  1050  	pxor	xmm0, xmm1
  1051  	pxor	xmm1, xmm2
  1052  	pcmpgtq	xmm1, xmm0
  1053  	movdqa	xmm0, xmm1
  1054  	blendvpd	xmm2, xmm11, xmm0
  1055  	movq	r8, xmm2
  1056  	cmp	r11, r9
  1057  	je	.LBB7_13
  1058  .LBB7_4:
  1059  	mov	rsi, rax
  1060  	.p2align	4, 0x90
  1061  .LBB7_5:                                # =>This Inner Loop Header: Depth=1
  1062  	mov	rax, qword ptr [rdi + 8*r11]
  1063  	cmp	r8, rax
  1064  	cmovae	r8, rax
  1065  	cmp	rsi, rax
  1066  	cmova	rax, rsi
  1067  	add	r11, 1
  1068  	mov	rsi, rax
  1069  	cmp	r9, r11
  1070  	jne	.LBB7_5
  1071  .LBB7_13:
  1072  	mov	qword ptr [rcx], rax
  1073  	mov	qword ptr [rdx], r8
  1074  	mov	rsp, rbp
  1075  	pop	rbp
  1076  	ret
  1077  .LBB7_7:
  1078  	xorpd	xmm13, xmm13
  1079  	pcmpeqd	xmm10, xmm10
  1080  	xor	eax, eax
  1081  	pcmpeqd	xmm11, xmm11
  1082  	xorpd	xmm6, xmm6
  1083  	test	r8b, 1
  1084  	jne	.LBB7_11
  1085  	jmp	.LBB7_12
  1086  .Lfunc_end7:
  1087  	.size	uint64_max_min_sse4, .Lfunc_end7-uint64_max_min_sse4
  1088                                          # -- End function
  1089  	.ident	"Debian clang version 11.0.1-2"
  1090  	.section	".note.GNU-stack","",@progbits
  1091  	.addrsig