modernc.org/ccgo/v3@v3.16.14/lib/testdata/gcc-9.1.0/gcc/testsuite/gcc.c-torture/execute/20060420-1.c (about)

     1  extern void abort (void);
     2  
     3  typedef float v4flt __attribute__ ((vector_size (16)));
     4  
     5  void __attribute__ ((noinline)) foo (float *dst, float **src, int a, int n)
     6  {
     7    int i, j;
     8    int z = sizeof (v4flt) / sizeof (float);
     9    unsigned m = sizeof (v4flt) - 1;
    10  
    11    for (j = 0; j < n && (((unsigned long) dst + j) & m); ++j)
    12      {
    13        float t = src[0][j];
    14        for (i = 1; i < a; ++i)
    15  	t += src[i][j];
    16        dst[j] = t;
    17      }
    18  
    19    for (; j < (n - (4 * z - 1)); j += 4 * z)
    20      {
    21        v4flt t0 = *(v4flt *) (src[0] + j + 0 * z);
    22        v4flt t1 = *(v4flt *) (src[0] + j + 1 * z);
    23        v4flt t2 = *(v4flt *) (src[0] + j + 2 * z);
    24        v4flt t3 = *(v4flt *) (src[0] + j + 3 * z);
    25        for (i = 1; i < a; ++i)
    26  	{
    27  	  t0 += *(v4flt *) (src[i] + j + 0 * z);
    28  	  t1 += *(v4flt *) (src[i] + j + 1 * z);
    29  	  t2 += *(v4flt *) (src[i] + j + 2 * z);
    30  	  t3 += *(v4flt *) (src[i] + j + 3 * z);
    31  	}
    32        *(v4flt *) (dst + j + 0 * z) = t0;
    33        *(v4flt *) (dst + j + 1 * z) = t1;
    34        *(v4flt *) (dst + j + 2 * z) = t2;
    35        *(v4flt *) (dst + j + 3 * z) = t3;
    36      }
    37    for (; j < n; ++j)
    38      {
    39        float t = src[0][j];
    40        for (i = 1; i < a; ++i)
    41  	t += src[i][j];
    42        dst[j] = t;
    43      }
    44  }
    45  
    46  float buffer[64];
    47  
    48  int
    49  main (void)
    50  {
    51    int i;
    52    float *dst, *src[2];
    53    char *cptr;
    54  
    55    cptr = (char *)buffer;
    56    cptr += (-(long int) buffer & (16 * sizeof (float) - 1));
    57    dst = (float *)cptr;
    58    src[0] = dst + 16;
    59    src[1] = dst + 32;
    60    for (i = 0; i < 16; ++i)
    61      {
    62        src[0][i] = (float) i + 11 * (float) i;
    63        src[1][i] = (float) i + 12 * (float) i;
    64      }
    65    foo (dst, src, 2, 16);
    66    for (i = 0; i < 16; ++i)
    67      {
    68        float e = (float) i + 11 * (float) i + (float) i + 12 * (float) i;
    69        if (dst[i] != e)
    70  	abort ();
    71      }
    72    return 0;
    73  }