gonum.org/v1/gonum@v0.14.0/internal/asm/f32/gemv_test.go (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package f32_test
     6  
     7  import (
     8  	"fmt"
     9  	"testing"
    10  
    11  	. "gonum.org/v1/gonum/internal/asm/f32"
    12  	"gonum.org/v1/gonum/internal/math32"
    13  )
    14  
    15  type SgemvCase struct {
    16  	m int
    17  	n int
    18  	A []float32
    19  	x []float32
    20  	y []float32
    21  
    22  	NoTrans []SgemvSubcase
    23  	Trans   []SgemvSubcase
    24  }
    25  
    26  type SgemvSubcase struct {
    27  	alpha     float32
    28  	beta      float32
    29  	want      []float32
    30  	wantRevX  []float32
    31  	wantRevY  []float32
    32  	wantRevXY []float32
    33  }
    34  
    35  var SgemvCases = []SgemvCase{
    36  	{ // 1x1
    37  		m: 1,
    38  		n: 1,
    39  		A: []float32{4.1},
    40  		x: []float32{2.2},
    41  		y: []float32{6.8},
    42  
    43  		NoTrans: []SgemvSubcase{ // (1x1)
    44  			{alpha: 0, beta: 0,
    45  				want:      []float32{0},
    46  				wantRevX:  []float32{0},
    47  				wantRevY:  []float32{0},
    48  				wantRevXY: []float32{0},
    49  			},
    50  			{alpha: 0, beta: 1,
    51  				want:      []float32{6.8},
    52  				wantRevX:  []float32{6.8},
    53  				wantRevY:  []float32{6.8},
    54  				wantRevXY: []float32{6.8},
    55  			},
    56  			{alpha: 1, beta: 0,
    57  				want:      []float32{9.02},
    58  				wantRevX:  []float32{9.02},
    59  				wantRevY:  []float32{9.02},
    60  				wantRevXY: []float32{9.02},
    61  			},
    62  			{alpha: 8, beta: -6,
    63  				want:      []float32{31.36},
    64  				wantRevX:  []float32{31.36},
    65  				wantRevY:  []float32{31.36},
    66  				wantRevXY: []float32{31.36},
    67  			},
    68  		},
    69  
    70  		Trans: []SgemvSubcase{ // (1x1)
    71  			{alpha: 0, beta: 0,
    72  				want:      []float32{0},
    73  				wantRevX:  []float32{0},
    74  				wantRevY:  []float32{0},
    75  				wantRevXY: []float32{0},
    76  			},
    77  			{alpha: 0, beta: 1,
    78  				want:      []float32{2.2},
    79  				wantRevX:  []float32{2.2},
    80  				wantRevY:  []float32{2.2},
    81  				wantRevXY: []float32{2.2},
    82  			},
    83  			{alpha: 1, beta: 0,
    84  				want:      []float32{27.88},
    85  				wantRevX:  []float32{27.88},
    86  				wantRevY:  []float32{27.88},
    87  				wantRevXY: []float32{27.88},
    88  			},
    89  			{alpha: 8, beta: -6,
    90  				want:      []float32{209.84},
    91  				wantRevX:  []float32{209.84},
    92  				wantRevY:  []float32{209.84},
    93  				wantRevXY: []float32{209.84},
    94  			},
    95  		},
    96  	},
    97  
    98  	{ // 3x2
    99  		m: 3,
   100  		n: 2,
   101  		A: []float32{
   102  			4.67, 2.75,
   103  			0.48, 1.21,
   104  			2.28, 2.82,
   105  		},
   106  		x: []float32{3.38, 3},
   107  		y: []float32{2.8, 1.71, 2.64},
   108  
   109  		NoTrans: []SgemvSubcase{ // (2x2, 1x2)
   110  			{alpha: 0, beta: 0,
   111  				want:      []float32{0, 0, 0},
   112  				wantRevX:  []float32{0, 0, 0},
   113  				wantRevY:  []float32{0, 0, 0},
   114  				wantRevXY: []float32{0, 0, 0},
   115  			},
   116  			{alpha: 0, beta: 1,
   117  				want:      []float32{2.8, 1.71, 2.64},
   118  				wantRevX:  []float32{2.8, 1.71, 2.64},
   119  				wantRevY:  []float32{2.8, 1.71, 2.64},
   120  				wantRevXY: []float32{2.8, 1.71, 2.64},
   121  			},
   122  			{alpha: 1, beta: 0,
   123  				want:      []float32{24.0346, 5.2524, 16.1664},
   124  				wantRevX:  []float32{23.305, 5.5298, 16.3716},
   125  				wantRevY:  []float32{16.1664, 5.2524, 24.0346},
   126  				wantRevXY: []float32{16.3716, 5.5298, 23.305},
   127  			},
   128  			{alpha: 8, beta: -6,
   129  				want:      []float32{175.4768, 31.7592, 113.4912},
   130  				wantRevX:  []float32{169.64, 33.9784, 115.1328},
   131  				wantRevY:  []float32{112.5312, 31.7592, 176.4368},
   132  				wantRevXY: []float32{114.1728, 33.9784, 170.6},
   133  			},
   134  		},
   135  
   136  		Trans: []SgemvSubcase{ // (2x2)
   137  			{alpha: 0, beta: 0,
   138  				want:      []float32{0, 0},
   139  				wantRevX:  []float32{0, 0},
   140  				wantRevY:  []float32{0, 0},
   141  				wantRevXY: []float32{0, 0},
   142  			},
   143  			{alpha: 0, beta: 1,
   144  				want:      []float32{3.38, 3},
   145  				wantRevX:  []float32{3.38, 3},
   146  				wantRevY:  []float32{3.38, 3},
   147  				wantRevXY: []float32{3.38, 3},
   148  			},
   149  			{alpha: 1, beta: 0,
   150  				want:      []float32{19.916, 17.2139},
   151  				wantRevX:  []float32{19.5336, 17.2251},
   152  				wantRevY:  []float32{17.2139, 19.916},
   153  				wantRevXY: []float32{17.2251, 19.5336},
   154  			},
   155  			{alpha: 8, beta: -6,
   156  				want:      []float32{139.048, 119.7112},
   157  				wantRevX:  []float32{135.9888, 119.8008},
   158  				wantRevY:  []float32{117.4312, 141.328},
   159  				wantRevXY: []float32{117.5208, 138.2688},
   160  			},
   161  		},
   162  	},
   163  
   164  	{ // 3x3
   165  		m: 3,
   166  		n: 3,
   167  		A: []float32{
   168  			4.38, 4.4, 4.26,
   169  			4.18, 0.56, 2.57,
   170  			2.59, 2.07, 0.46,
   171  		},
   172  		x: []float32{4.82, 1.82, 1.12},
   173  		y: []float32{0.24, 1.41, 3.45},
   174  
   175  		NoTrans: []SgemvSubcase{ // (2x2, 2x1, 1x2, 1x1)
   176  			{alpha: 0, beta: 0,
   177  				want:      []float32{0, 0, 0},
   178  				wantRevX:  []float32{0, 0, 0},
   179  				wantRevY:  []float32{0, 0, 0},
   180  				wantRevXY: []float32{0, 0, 0},
   181  			},
   182  			{alpha: 0, beta: 1,
   183  				want:      []float32{0.24, 1.41, 3.45},
   184  				wantRevX:  []float32{0.24, 1.41, 3.45},
   185  				wantRevY:  []float32{0.24, 1.41, 3.45},
   186  				wantRevXY: []float32{0.24, 1.41, 3.45},
   187  			},
   188  			{alpha: 1, beta: 0,
   189  				want:      []float32{33.8908, 24.0452, 16.7664},
   190  				wantRevX:  []float32{33.4468, 18.0882, 8.8854},
   191  				wantRevY:  []float32{16.7664, 24.0452, 33.8908},
   192  				wantRevXY: []float32{8.8854, 18.0882, 33.4468},
   193  			},
   194  			{alpha: 8, beta: -6,
   195  				want:      []float32{269.6864, 183.9016, 113.4312},
   196  				wantRevX:  []float32{266.1344, 136.2456, 50.3832},
   197  				wantRevY:  []float32{132.6912, 183.9016, 250.4264},
   198  				wantRevXY: []float32{69.6432, 136.2456, 246.8744},
   199  			},
   200  		},
   201  
   202  		Trans: []SgemvSubcase{ // (2x2, 1x2, 2x1, 1x1)
   203  			{alpha: 0, beta: 0,
   204  				want:      []float32{0, 0, 0},
   205  				wantRevX:  []float32{0, 0, 0},
   206  				wantRevY:  []float32{0, 0, 0},
   207  				wantRevXY: []float32{0, 0, 0},
   208  			},
   209  			{alpha: 0, beta: 1,
   210  				want:      []float32{4.82, 1.82, 1.12},
   211  				wantRevX:  []float32{4.82, 1.82, 1.12},
   212  				wantRevY:  []float32{4.82, 1.82, 1.12},
   213  				wantRevXY: []float32{4.82, 1.82, 1.12},
   214  			},
   215  			{alpha: 1, beta: 0,
   216  				want:      []float32{15.8805, 8.9871, 6.2331},
   217  				wantRevX:  []float32{21.6264, 16.4664, 18.4311},
   218  				wantRevY:  []float32{6.2331, 8.9871, 15.8805},
   219  				wantRevXY: []float32{18.4311, 16.4664, 21.6264},
   220  			},
   221  			{alpha: 8, beta: -6,
   222  				want:      []float32{98.124, 60.9768, 43.1448},
   223  				wantRevX:  []float32{144.0912, 120.8112, 140.7288},
   224  				wantRevY:  []float32{20.9448, 60.9768, 120.324},
   225  				wantRevXY: []float32{118.5288, 120.8112, 166.2912},
   226  			},
   227  		},
   228  	},
   229  
   230  	{ // 5x3
   231  		m: 5,
   232  		n: 3,
   233  		A: []float32{
   234  			4.1, 6.2, 8.1,
   235  			9.6, 3.5, 9.1,
   236  			10, 7, 3,
   237  			1, 1, 2,
   238  			9, 2, 5,
   239  		},
   240  		x: []float32{1, 2, 3},
   241  		y: []float32{7, 8, 9, 10, 11},
   242  
   243  		NoTrans: []SgemvSubcase{ //(4x2, 4x1, 1x2, 1x1)
   244  			{alpha: 0, beta: 0,
   245  				want:      []float32{0, 0, 0, 0, 0},
   246  				wantRevX:  []float32{0, 0, 0, 0, 0},
   247  				wantRevY:  []float32{0, 0, 0, 0, 0},
   248  				wantRevXY: []float32{0, 0, 0, 0, 0},
   249  			},
   250  			{alpha: 0, beta: 1,
   251  				want:      []float32{7, 8, 9, 10, 11},
   252  				wantRevX:  []float32{7, 8, 9, 10, 11},
   253  				wantRevY:  []float32{7, 8, 9, 10, 11},
   254  				wantRevXY: []float32{7, 8, 9, 10, 11},
   255  			},
   256  			{alpha: 1, beta: 0,
   257  				want:      []float32{40.8, 43.9, 33, 9, 28},
   258  				wantRevX:  []float32{32.8, 44.9, 47, 7, 36},
   259  				wantRevY:  []float32{28, 9, 33, 43.9, 40.8},
   260  				wantRevXY: []float32{36, 7, 47, 44.9, 32.8},
   261  			},
   262  			{alpha: 8, beta: -6,
   263  				want:      []float32{284.4, 303.2, 210, 12, 158},
   264  				wantRevX:  []float32{220.4, 311.2, 322, -4, 222},
   265  				wantRevY:  []float32{182, 24, 210, 291.2, 260.4},
   266  				wantRevXY: []float32{246, 8, 322, 299.2, 196.4},
   267  			},
   268  		},
   269  
   270  		Trans: []SgemvSubcase{ //( 2x4, 1x4, 2x1, 1x1)
   271  			{alpha: 0, beta: 0,
   272  				want:      []float32{0, 0, 0},
   273  				wantRevX:  []float32{0, 0, 0},
   274  				wantRevY:  []float32{0, 0, 0},
   275  				wantRevXY: []float32{0, 0, 0},
   276  			},
   277  			{alpha: 0, beta: 1,
   278  				want:      []float32{1, 2, 3},
   279  				wantRevX:  []float32{1, 2, 3},
   280  				wantRevY:  []float32{1, 2, 3},
   281  				wantRevXY: []float32{1, 2, 3},
   282  			},
   283  			{alpha: 1, beta: 0,
   284  				want:      []float32{304.5, 166.4, 231.5},
   285  				wantRevX:  []float32{302.1, 188.2, 258.1},
   286  				wantRevY:  []float32{231.5, 166.4, 304.5},
   287  				wantRevXY: []float32{258.1, 188.2, 302.1},
   288  			},
   289  			{alpha: 8, beta: -6,
   290  				want:      []float32{2430, 1319.2, 1834},
   291  				wantRevX:  []float32{2410.8, 1493.6, 2046.8},
   292  				wantRevY:  []float32{1846, 1319.2, 2418},
   293  				wantRevXY: []float32{2058.8, 1493.6, 2398.8},
   294  			},
   295  		},
   296  	},
   297  
   298  	{ // 3x5
   299  		m: 3,
   300  		n: 5,
   301  		A: []float32{
   302  			1.4, 2.34, 3.96, 0.96, 2.3,
   303  			3.43, 0.62, 1.09, 0.2, 3.56,
   304  			1.15, 0.58, 3.8, 1.16, 0.01,
   305  		},
   306  		x: []float32{2.34, 2.82, 4.73, 0.22, 3.91},
   307  		y: []float32{2.46, 2.22, 4.75},
   308  
   309  		NoTrans: []SgemvSubcase{ // (2x4, 2x1, 1x4, 1x1)
   310  			{alpha: 0, beta: 0,
   311  				want:      []float32{0, 0, 0},
   312  				wantRevX:  []float32{0, 0, 0},
   313  				wantRevY:  []float32{0, 0, 0},
   314  				wantRevXY: []float32{0, 0, 0},
   315  			},
   316  			{alpha: 0, beta: 1,
   317  				want:      []float32{2.46, 2.22, 4.75},
   318  				wantRevX:  []float32{2.46, 2.22, 4.75},
   319  				wantRevY:  []float32{2.46, 2.22, 4.75},
   320  				wantRevXY: []float32{2.46, 2.22, 4.75},
   321  			},
   322  			{alpha: 1, beta: 0,
   323  				want:      []float32{37.8098, 28.8939, 22.5949},
   324  				wantRevX:  []float32{32.8088, 27.5978, 25.8927},
   325  				wantRevY:  []float32{22.5949, 28.8939, 37.8098},
   326  				wantRevXY: []float32{25.8927, 27.5978, 32.8088},
   327  			},
   328  			{alpha: 8, beta: -6,
   329  				want:      []float32{287.7184, 217.8312, 152.2592},
   330  				wantRevX:  []float32{247.7104, 207.4624, 178.6416},
   331  				wantRevY:  []float32{165.9992, 217.8312, 273.9784},
   332  				wantRevXY: []float32{192.3816, 207.4624, 233.9704},
   333  			},
   334  		},
   335  
   336  		Trans: []SgemvSubcase{ // (4x2, 1x2, 4x1, 1x1)
   337  			{alpha: 0, beta: 0,
   338  				want:      []float32{0, 0, 0, 0, 0},
   339  				wantRevX:  []float32{0, 0, 0, 0, 0},
   340  				wantRevY:  []float32{0, 0, 0, 0, 0},
   341  				wantRevXY: []float32{0, 0, 0, 0, 0},
   342  			},
   343  			{alpha: 0, beta: 1,
   344  				want:      []float32{2.34, 2.82, 4.73, 0.22, 3.91},
   345  				wantRevX:  []float32{2.34, 2.82, 4.73, 0.22, 3.91},
   346  				wantRevY:  []float32{2.34, 2.82, 4.73, 0.22, 3.91},
   347  				wantRevXY: []float32{2.34, 2.82, 4.73, 0.22, 3.91},
   348  			},
   349  			{alpha: 1, beta: 0,
   350  				want:      []float32{16.5211, 9.8878, 30.2114, 8.3156, 13.6087},
   351  				wantRevX:  []float32{17.0936, 13.9182, 30.5778, 7.8576, 18.8528},
   352  				wantRevY:  []float32{13.6087, 8.3156, 30.2114, 9.8878, 16.5211},
   353  				wantRevXY: []float32{18.8528, 7.8576, 30.5778, 13.9182, 17.0936},
   354  			},
   355  			{alpha: 8, beta: -6,
   356  				want:      []float32{118.1288, 62.1824, 213.3112, 65.2048, 85.4096},
   357  				wantRevX:  []float32{122.7088, 94.4256, 216.2424, 61.5408, 127.3624},
   358  				wantRevY:  []float32{94.8296, 49.6048, 213.3112, 77.7824, 108.7088},
   359  				wantRevXY: []float32{136.7824, 45.9408, 216.2424, 110.0256, 113.2888},
   360  			},
   361  		},
   362  	},
   363  
   364  	{ // 7x7 & nan test
   365  		m: 7,
   366  		n: 7,
   367  		A: []float32{
   368  			0.9, 2.6, 0.5, 1.8, 2.3, 0.6, 0.2,
   369  			1.6, 0.6, 1.3, 2.1, 1.4, 0.4, 0.8,
   370  			2.9, 0.9, 2.3, 2.5, 1.4, 1.8, 1.6,
   371  			2.6, 2.8, 2.1, 0.3, nan, 2.2, 1.3,
   372  			0.2, 2.2, 1.8, 1.8, 2.1, 1.3, 1.4,
   373  			1.7, 1.4, 2.3, 2., 1., 0., 1.4,
   374  			2.1, 1.9, 0.8, 2.9, 1.3, 0.3, 1.3,
   375  		},
   376  		x: []float32{0.4, 2.8, 3.5, 0.3, 0.6, 2.5, 3.1},
   377  		y: []float32{3.2, 4.4, 5., 4.3, 4.1, 1.4, 0.2},
   378  
   379  		NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1)
   380  			{alpha: 0, beta: 0,
   381  				want:      []float32{0, 0, 0, nan, 0, 0, 0},
   382  				wantRevX:  []float32{0, 0, 0, nan, 0, 0, 0},
   383  				wantRevY:  []float32{0, 0, 0, nan, 0, 0, 0},
   384  				wantRevXY: []float32{0, 0, 0, nan, 0, 0, 0},
   385  			},
   386  			{alpha: 0, beta: 1,
   387  				want:      []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   388  				wantRevX:  []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   389  				wantRevY:  []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   390  				wantRevXY: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   391  			},
   392  			{alpha: 1, beta: 0,
   393  				want:      []float32{13.43, 11.82, 22.78, nan, 21.93, 18.19, 15.39},
   394  				wantRevX:  []float32{19.94, 14.21, 23.95, nan, 19.29, 14.81, 18.52},
   395  				wantRevY:  []float32{15.39, 18.19, 21.93, nan, 22.78, 11.82, 13.43},
   396  				wantRevXY: []float32{18.52, 14.81, 19.29, nan, 23.95, 14.21, 19.94},
   397  			},
   398  			{alpha: 8, beta: -6,
   399  				want:      []float32{88.24, 68.16, 152.24, nan, 150.84, 137.12, 121.92},
   400  				wantRevX:  []float32{140.32, 87.28, 161.6, nan, 129.72, 110.08, 146.96},
   401  				wantRevY:  []float32{103.92, 119.12, 145.44, nan, 157.64, 86.16, 106.24},
   402  				wantRevXY: []float32{128.96, 92.08, 124.32, nan, 167., 105.28, 158.32},
   403  			},
   404  		},
   405  
   406  		Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1)
   407  			{alpha: 0, beta: 0,
   408  				want:      []float32{0, 0, 0, 0, nan, 0, 0},
   409  				wantRevX:  []float32{0, 0, 0, 0, nan, 0, 0},
   410  				wantRevY:  []float32{0, 0, nan, 0, 0, 0, 0},
   411  				wantRevXY: []float32{0, 0, nan, 0, 0, 0, 0},
   412  			},
   413  			{alpha: 0, beta: 1,
   414  				want:      []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1},
   415  				wantRevX:  []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1},
   416  				wantRevY:  []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1},
   417  				wantRevXY: []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1},
   418  			},
   419  			{alpha: 1, beta: 0,
   420  				want:      []float32{39.22, 38.86, 38.61, 39.55, nan, 27.53, 25.71},
   421  				wantRevX:  []float32{40.69, 40.33, 42.06, 41.92, nan, 24.98, 30.63},
   422  				wantRevY:  []float32{25.71, 27.53, nan, 39.55, 38.61, 38.86, 39.22},
   423  				wantRevXY: []float32{30.63, 24.98, nan, 41.92, 42.06, 40.33, 40.69},
   424  			},
   425  			{alpha: 8, beta: -6,
   426  				want:      []float32{311.36, 294.08, 287.88, 314.6, nan, 205.24, 187.08},
   427  				wantRevX:  []float32{323.12, 305.84, 315.48, 333.56, nan, 184.84, 226.44},
   428  				wantRevY:  []float32{203.28, 203.44, nan, 314.6, 305.28, 295.88, 295.16},
   429  				wantRevXY: []float32{242.64, 183.04, nan, 333.56, 332.88, 307.64, 306.92},
   430  			},
   431  		},
   432  	},
   433  	{ // 11x11
   434  		m: 11,
   435  		n: 11,
   436  		A: []float32{
   437  			0.4, 3., 2.5, 2., 0.4, 2., 2., 1., 0.1, 0.3, 2.,
   438  			1.7, 0.7, 2.6, 1.6, 0.5, 2.4, 3., 0.9, 0.1, 2.8, 1.3,
   439  			1.1, 2.2, 1.5, 0.8, 2.9, 0.4, 0.5, 1.7, 0.8, 2.6, 0.7,
   440  			2.2, 1.7, 0.8, 2.9, 0.7, 0.7, 1.7, 1.8, 1.9, 2.4, 1.9,
   441  			0.3, 0.5, 1.6, 1.5, 1.5, 2.4, 1.7, 1.2, 1.9, 2.8, 1.2,
   442  			1.4, 2.2, 1.7, 1.4, 2.7, 1.4, 0.9, 1.8, 0.5, 1.2, 1.9,
   443  			0.8, 2.3, 1.7, 1.3, 2., 2.8, 2.6, 0.4, 2.5, 1.3, 0.5,
   444  			2.4, 2.8, 1.1, 0.2, 0.4, 2.8, 0.5, 0.5, 0., 2.8, 1.9,
   445  			2.3, 1.8, 2.3, 1.7, 1.1, 0.1, 1.4, 1.2, 1.9, 0.5, 0.6,
   446  			0.6, 2.4, 1.2, 0.3, 1.4, 1.3, 2.5, 2.6, 0., 1.3, 2.6,
   447  			0.7, 1.5, 0.2, 1.4, 1.1, 1.8, 0.2, 1., 1., 0.6, 1.2,
   448  		},
   449  		x: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   450  		y: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   451  
   452  		NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1)
   453  			{alpha: 0, beta: 0,
   454  				want:      []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   455  				wantRevX:  []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   456  				wantRevY:  []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   457  				wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   458  			},
   459  			{alpha: 0, beta: 1,
   460  				want:      []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   461  				wantRevX:  []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   462  				wantRevY:  []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   463  				wantRevXY: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   464  			},
   465  			{alpha: 1, beta: 0,
   466  				want:      []float32{32.71, 38.93, 33.55, 45.46, 39.24, 38.41, 46.23, 25.78, 37.33, 37.42, 24.63},
   467  				wantRevX:  []float32{39.82, 43.78, 37.73, 41.19, 40.17, 44.41, 42.75, 28.14, 35.6, 41.25, 23.9},
   468  				wantRevY:  []float32{24.63, 37.42, 37.33, 25.78, 46.23, 38.41, 39.24, 45.46, 33.55, 38.93, 32.71},
   469  				wantRevXY: []float32{23.9, 41.25, 35.6, 28.14, 42.75, 44.41, 40.17, 41.19, 37.73, 43.78, 39.82},
   470  			},
   471  			{alpha: 8, beta: -6,
   472  				want:      []float32{238.88, 291.04, 258.8, 334.88, 288.12, 304.28, 357.84, 191.24, 289.64, 282.56, 173.64},
   473  				wantRevX:  []float32{295.76, 329.84, 292.24, 300.72, 295.56, 352.28, 330., 210.12, 275.8, 313.2, 167.8},
   474  				wantRevY:  []float32{174.24, 278.96, 289.04, 177.44, 344.04, 304.28, 301.92, 348.68, 259.4, 294.64, 238.28},
   475  				wantRevXY: []float32{168.4, 309.6, 275.2, 196.32, 316.2, 352.28, 309.36, 314.52, 292.84, 333.44, 295.16},
   476  			},
   477  		},
   478  
   479  		Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1)
   480  			{alpha: 0, beta: 0,
   481  				want:      []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   482  				wantRevX:  []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   483  				wantRevY:  []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   484  				wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   485  			},
   486  			{alpha: 0, beta: 1,
   487  				want:      []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   488  				wantRevX:  []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   489  				wantRevY:  []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   490  				wantRevXY: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   491  			},
   492  			{alpha: 1, beta: 0,
   493  				want:      []float32{37.07, 55.58, 46.05, 47.34, 33.88, 54.19, 50.85, 39.31, 31.29, 55.31, 46.98},
   494  				wantRevX:  []float32{38.11, 63.38, 46.44, 40.04, 34.63, 59.27, 50.13, 35.45, 28.26, 51.64, 46.22},
   495  				wantRevY:  []float32{46.98, 55.31, 31.29, 39.31, 50.85, 54.19, 33.88, 47.34, 46.05, 55.58, 37.07},
   496  				wantRevXY: []float32{46.22, 51.64, 28.26, 35.45, 50.13, 59.27, 34.63, 40.04, 46.44, 63.38, 38.11},
   497  			},
   498  			{alpha: 8, beta: -6,
   499  				want:      []float32{281.56, 437.44, 363.6, 361.32, 250.64, 422.72, 379.2, 294.68, 227.52, 437.08, 369.24},
   500  				wantRevX:  []float32{289.88, 499.84, 366.72, 302.92, 256.64, 463.36, 373.44, 263.8, 203.28, 407.72, 363.16},
   501  				wantRevY:  []float32{360.84, 435.28, 245.52, 297.08, 386.4, 422.72, 243.44, 358.92, 345.6, 439.24, 289.96},
   502  				wantRevXY: []float32{354.76, 405.92, 221.28, 266.2, 380.64, 463.36, 249.44, 300.52, 348.72, 501.64, 298.28},
   503  			},
   504  		},
   505  	},
   506  }
   507  
   508  func TestGemv(t *testing.T) {
   509  	for _, test := range SgemvCases {
   510  		t.Run(fmt.Sprintf("(%vx%v)", test.m, test.n), func(tt *testing.T) {
   511  			for i, cas := range test.NoTrans {
   512  				tt.Run(fmt.Sprintf("NoTrans case %v", i), func(st *testing.T) {
   513  					sgemvcomp(st, test, false, cas, i)
   514  				})
   515  			}
   516  			for i, cas := range test.Trans {
   517  				tt.Run(fmt.Sprintf("Trans case %v", i), func(st *testing.T) {
   518  					sgemvcomp(st, test, true, cas, i)
   519  				})
   520  			}
   521  		})
   522  	}
   523  }
   524  
   525  func sgemvcomp(t *testing.T, test SgemvCase, trans bool, cas SgemvSubcase, i int) {
   526  	const (
   527  		tol = 1e-6
   528  
   529  		xGdVal, yGdVal, aGdVal = 0.5, 1.5, 10
   530  		gdLn                   = 4
   531  	)
   532  	if trans {
   533  		test.x, test.y = test.y, test.x
   534  	}
   535  	prefix := fmt.Sprintf("Test (%vx%v) t:%v (a:%v,b:%v)", test.m, test.n, trans, cas.alpha, cas.beta)
   536  	xg, yg := guardVector(test.x, xGdVal, gdLn), guardVector(test.y, yGdVal, gdLn)
   537  	x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn]
   538  	ag := guardVector(test.A, aGdVal, gdLn)
   539  	a := ag[gdLn : len(ag)-gdLn]
   540  
   541  	lda := uintptr(test.n)
   542  	if trans {
   543  		GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1)
   544  	} else {
   545  		GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1)
   546  	}
   547  	for i := range cas.want {
   548  		if !sameApprox(y[i], cas.want[i], tol) {
   549  			t.Errorf(msgVal, prefix, i, y[i], cas.want[i])
   550  		}
   551  	}
   552  
   553  	if !isValidGuard(xg, xGdVal, gdLn) {
   554  		t.Errorf(msgGuard, prefix, "x", xg[:gdLn], xg[len(xg)-gdLn:])
   555  	}
   556  	if !isValidGuard(yg, yGdVal, gdLn) {
   557  		t.Errorf(msgGuard, prefix, "y", yg[:gdLn], yg[len(yg)-gdLn:])
   558  	}
   559  	if !isValidGuard(ag, aGdVal, gdLn) {
   560  		t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:])
   561  	}
   562  	if !equalStrided(test.x, x, 1) {
   563  		t.Errorf(msgReadOnly, prefix, "x")
   564  	}
   565  	if !equalStrided(test.A, a, 1) {
   566  		t.Errorf(msgReadOnly, prefix, "a")
   567  	}
   568  
   569  	for _, inc := range newIncSet(-1, 1, 2, 3, 90) {
   570  		incPrefix := fmt.Sprintf("%s inc(x:%v, y:%v)", prefix, inc.x, inc.y)
   571  		want, incY := cas.want, inc.y
   572  		switch {
   573  		case inc.x < 0 && inc.y < 0:
   574  			want = cas.wantRevXY
   575  			incY = -inc.y
   576  		case inc.x < 0:
   577  			want = cas.wantRevX
   578  		case inc.y < 0:
   579  			want = cas.wantRevY
   580  			incY = -inc.y
   581  		}
   582  		xg, yg := guardIncVector(test.x, xGdVal, inc.x, gdLn), guardIncVector(test.y, yGdVal, inc.y, gdLn)
   583  		x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn]
   584  		ag := guardVector(test.A, aGdVal, gdLn)
   585  		a := ag[gdLn : len(ag)-gdLn]
   586  
   587  		if trans {
   588  			GemvT(uintptr(test.m), uintptr(test.n), cas.alpha,
   589  				a, lda, x, uintptr(inc.x),
   590  				cas.beta, y, uintptr(inc.y))
   591  		} else {
   592  			GemvN(uintptr(test.m), uintptr(test.n), cas.alpha,
   593  				a, lda, x, uintptr(inc.x),
   594  				cas.beta, y, uintptr(inc.y))
   595  		}
   596  		for i := range want {
   597  			if !sameApprox(y[i*incY], want[i], tol) {
   598  				t.Errorf(msgVal, incPrefix, i, y[i*incY], want[i])
   599  				t.Error(y[i*incY] - want[i])
   600  			}
   601  		}
   602  
   603  		checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn)
   604  		checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn)
   605  		if !isValidGuard(ag, aGdVal, gdLn) {
   606  			t.Errorf(msgGuard, incPrefix, "a", ag[:gdLn], ag[len(ag)-gdLn:])
   607  		}
   608  		if !equalStrided(test.x, x, inc.x) {
   609  			t.Errorf(msgReadOnly, incPrefix, "x")
   610  		}
   611  		if !equalStrided(test.A, a, 1) {
   612  			t.Errorf(msgReadOnly, incPrefix, "a")
   613  		}
   614  	}
   615  }
   616  
   617  // equalStrided returns true if the strided vector x contains elements of the
   618  // dense vector ref at indices i*inc, false otherwise.
   619  func equalStrided(ref, x []float32, inc int) bool {
   620  	if inc < 0 {
   621  		inc = -inc
   622  	}
   623  	for i, v := range ref {
   624  		if !scalarSame(x[i*inc], v) {
   625  			return false
   626  		}
   627  	}
   628  	return true
   629  }
   630  
   631  func scalarSame(a, b float32) bool {
   632  	return a == b || (math32.IsNaN(a) && math32.IsNaN(b))
   633  }