gitee.com/quant1x/gox@v1.7.6/num/asm/_avx2/find.s (about)

     1  Find_F64(double*, double, unsigned long):                        # @Find_F64(double*, double, unsigned long)
     2          movq    %rsi, %rcx
     3          andq    $-8, %rcx
     4          je      .LBB0_1
     5          vpbroadcastq    %xmm0, %ymm1
     6          xorl    %eax, %eax
     7  .LBB0_7:                                # =>This Inner Loop Header: Depth=1
     8          vpcmpeqq        (%rdi,%rax,8), %ymm1, %ymm2
     9          vpcmpeqq        32(%rdi,%rax,8), %ymm1, %ymm3
    10          vpor    %ymm2, %ymm3, %ymm4
    11          vptest  %ymm4, %ymm4
    12          jne     .LBB0_8
    13          addq    $8, %rax
    14          cmpq    %rcx, %rax
    15          jb      .LBB0_7
    16          cmpq    %rsi, %rax
    17          jb      .LBB0_3
    18  .LBB0_9:
    19          vzeroupper
    20          retq
    21  .LBB0_1:
    22          xorl    %eax, %eax
    23          cmpq    %rsi, %rax
    24          jae     .LBB0_9
    25  .LBB0_3:                                # =>This Inner Loop Header: Depth=1
    26          vucomisd        (%rdi,%rax,8), %xmm0
    27          je      .LBB0_9
    28          addq    $1, %rax
    29          cmpq    %rax, %rsi
    30          jne     .LBB0_3
    31          movq    %rsi, %rax
    32          vzeroupper
    33          retq
    34  .LBB0_8:
    35          vmovmskpd       %ymm3, %ecx
    36          shll    $4, %ecx
    37          vmovmskpd       %ymm2, %edx
    38          orl     %ecx, %edx
    39          bsfl    %edx, %ecx
    40          addq    %rcx, %rax
    41          vzeroupper
    42          retq
    43  Find_F32(float*, float, unsigned long):                        # @Find_F32(float*, float, unsigned long)
    44          movq    %rsi, %rcx
    45          andq    $-16, %rcx
    46          je      .LBB1_1
    47          vpbroadcastd    %xmm0, %ymm1
    48          xorl    %eax, %eax
    49  .LBB1_7:                                # =>This Inner Loop Header: Depth=1
    50          vpcmpeqd        (%rdi,%rax,4), %ymm1, %ymm2
    51          vpcmpeqd        32(%rdi,%rax,4), %ymm1, %ymm3
    52          vpor    %ymm2, %ymm3, %ymm4
    53          vptest  %ymm4, %ymm4
    54          jne     .LBB1_8
    55          addq    $16, %rax
    56          cmpq    %rcx, %rax
    57          jb      .LBB1_7
    58          cmpq    %rsi, %rax
    59          jb      .LBB1_3
    60  .LBB1_9:
    61          vzeroupper
    62          retq
    63  .LBB1_1:
    64          xorl    %eax, %eax
    65          cmpq    %rsi, %rax
    66          jae     .LBB1_9
    67  .LBB1_3:                                # =>This Inner Loop Header: Depth=1
    68          vucomiss        (%rdi,%rax,4), %xmm0
    69          je      .LBB1_9
    70          addq    $1, %rax
    71          cmpq    %rax, %rsi
    72          jne     .LBB1_3
    73          movq    %rsi, %rax
    74          vzeroupper
    75          retq
    76  .LBB1_8:
    77          vmovmskps       %ymm3, %ecx
    78          shll    $8, %ecx
    79          vmovmskps       %ymm2, %edx
    80          orl     %ecx, %edx
    81          bsfl    %edx, %ecx
    82          addq    %rcx, %rax
    83          vzeroupper
    84          retq