gitee.com/quant1x/num@v0.3.2/asm/c2goasm/test/cpp/MaddMemcpy.cpp (about)

     1  #include <stdio.h>
     2  #include <string.h>
     3  #include <immintrin.h>
     4  
     5  void MaddMemcpy(float* arg1, float* arg2, float* arg3, int size1, int size2, float* result) {
     6      memcpy(arg2, arg1, size1);
     7      memcpy(arg3, arg1, size2);
     8      __m256 vec1 = _mm256_load_ps(arg1);
     9      __m256 vec2 = _mm256_load_ps(arg2);
    10      __m256 vec3 = _mm256_load_ps(arg3);
    11      __m256 res  = _mm256_fmadd_ps(vec1, vec2, vec3);
    12      _mm256_storeu_ps(result, res);
    13  }