gitee.com/quant1x/gox@v1.7.6/num/asm/_avx2/find.s (about) 1 Find_F64(double*, double, unsigned long): # @Find_F64(double*, double, unsigned long) 2 movq %rsi, %rcx 3 andq $-8, %rcx 4 je .LBB0_1 5 vpbroadcastq %xmm0, %ymm1 6 xorl %eax, %eax 7 .LBB0_7: # =>This Inner Loop Header: Depth=1 8 vpcmpeqq (%rdi,%rax,8), %ymm1, %ymm2 9 vpcmpeqq 32(%rdi,%rax,8), %ymm1, %ymm3 10 vpor %ymm2, %ymm3, %ymm4 11 vptest %ymm4, %ymm4 12 jne .LBB0_8 13 addq $8, %rax 14 cmpq %rcx, %rax 15 jb .LBB0_7 16 cmpq %rsi, %rax 17 jb .LBB0_3 18 .LBB0_9: 19 vzeroupper 20 retq 21 .LBB0_1: 22 xorl %eax, %eax 23 cmpq %rsi, %rax 24 jae .LBB0_9 25 .LBB0_3: # =>This Inner Loop Header: Depth=1 26 vucomisd (%rdi,%rax,8), %xmm0 27 je .LBB0_9 28 addq $1, %rax 29 cmpq %rax, %rsi 30 jne .LBB0_3 31 movq %rsi, %rax 32 vzeroupper 33 retq 34 .LBB0_8: 35 vmovmskpd %ymm3, %ecx 36 shll $4, %ecx 37 vmovmskpd %ymm2, %edx 38 orl %ecx, %edx 39 bsfl %edx, %ecx 40 addq %rcx, %rax 41 vzeroupper 42 retq 43 Find_F32(float*, float, unsigned long): # @Find_F32(float*, float, unsigned long) 44 movq %rsi, %rcx 45 andq $-16, %rcx 46 je .LBB1_1 47 vpbroadcastd %xmm0, %ymm1 48 xorl %eax, %eax 49 .LBB1_7: # =>This Inner Loop Header: Depth=1 50 vpcmpeqd (%rdi,%rax,4), %ymm1, %ymm2 51 vpcmpeqd 32(%rdi,%rax,4), %ymm1, %ymm3 52 vpor %ymm2, %ymm3, %ymm4 53 vptest %ymm4, %ymm4 54 jne .LBB1_8 55 addq $16, %rax 56 cmpq %rcx, %rax 57 jb .LBB1_7 58 cmpq %rsi, %rax 59 jb .LBB1_3 60 .LBB1_9: 61 vzeroupper 62 retq 63 .LBB1_1: 64 xorl %eax, %eax 65 cmpq %rsi, %rax 66 jae .LBB1_9 67 .LBB1_3: # =>This Inner Loop Header: Depth=1 68 vucomiss (%rdi,%rax,4), %xmm0 69 je .LBB1_9 70 addq $1, %rax 71 cmpq %rax, %rsi 72 jne .LBB1_3 73 movq %rsi, %rax 74 vzeroupper 75 retq 76 .LBB1_8: 77 vmovmskps %ymm3, %ecx 78 shll $8, %ecx 79 vmovmskps %ymm2, %edx 80 orl %ecx, %edx 81 bsfl %edx, %ecx 82 addq %rcx, %rax 83 vzeroupper 84 retq