gitee.com/quant1x/num@v0.3.2/asm/src/floats_sve2.c (about)

     1  // Copyright 2022 gorse Project Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  // http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  #include <arm_sve.h>
    16  #include <stdint.h>
    17  
    18  void svmul_const_add_to(float *a, float *b, float *c, long n)
    19  {
    20      for (long i = 0; i < n; i += svcntw())
    21      {
    22          svbool_t pg = svwhilelt_b32(i, n);
    23          svfloat32_t a_seg = svld1(pg, a + i);
    24          svfloat32_t c_seg = svld1(pg, c + i);
    25          svst1(pg, c + i, svmla_x(pg, c_seg, a_seg, *b));
    26      }
    27  }
    28  
    29  void svmul_const_to(float *a, float *b, float *c, long n)
    30  {
    31      for (long i = 0; i < n; i += svcntw())
    32      {
    33          svbool_t pg = svwhilelt_b32(i, n);
    34          svfloat32_t a_seg = svld1(pg, a + i);
    35          svst1(pg, c + i, svmul_x(pg, a_seg, *b));
    36      }
    37  }
    38  
    39  void svmul_const(float *a, float *b, long n)
    40  {
    41      for (long i = 0; i < n; i += svcntw())
    42      {
    43          svbool_t pg = svwhilelt_b32(i, n);
    44          svfloat32_t a_seg = svld1(pg, a + i);
    45          svst1(pg, a + i, svmul_x(pg, a_seg, *b));
    46      }
    47  }
    48  
    49  void svmul_to(float *a, float *b, float *c, long n)
    50  {
    51      for (long i = 0; i < n; i += svcntw())
    52      {
    53          svbool_t pg = svwhilelt_b32(i, n);
    54          svfloat32_t a_seg = svld1(pg, a + i);
    55          svfloat32_t b_seg = svld1(pg, b + i);
    56          svst1(pg, c + i, svmul_x(pg, a_seg, b_seg));
    57      }
    58  }