github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/internal/asm/f64/gemv_test.go (about)

     1  // Copyright ©2017 The Gonum Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package f64_test
     6  
     7  import (
     8  	"fmt"
     9  	"testing"
    10  
    11  	. "github.com/jingcheng-WU/gonum/internal/asm/f64"
    12  )
    13  
    14  type DgemvCase struct {
    15  	m int
    16  	n int
    17  	A []float64
    18  	x []float64
    19  	y []float64
    20  
    21  	NoTrans []DgemvSubcase
    22  	Trans   []DgemvSubcase
    23  }
    24  
    25  type DgemvSubcase struct {
    26  	alpha     float64
    27  	beta      float64
    28  	want      []float64
    29  	wantRevX  []float64
    30  	wantRevY  []float64
    31  	wantRevXY []float64
    32  }
    33  
    34  var DgemvCases = []DgemvCase{
    35  	{ // 1x1
    36  		m: 1,
    37  		n: 1,
    38  		A: []float64{4.1},
    39  		x: []float64{2.2},
    40  		y: []float64{6.8},
    41  
    42  		NoTrans: []DgemvSubcase{ // (1x1)
    43  			{alpha: 0, beta: 0,
    44  				want:      []float64{0},
    45  				wantRevX:  []float64{0},
    46  				wantRevY:  []float64{0},
    47  				wantRevXY: []float64{0},
    48  			},
    49  			{alpha: 0, beta: 1,
    50  				want:      []float64{6.8},
    51  				wantRevX:  []float64{6.8},
    52  				wantRevY:  []float64{6.8},
    53  				wantRevXY: []float64{6.8},
    54  			},
    55  			{alpha: 1, beta: 0,
    56  				want:      []float64{9.02},
    57  				wantRevX:  []float64{9.02},
    58  				wantRevY:  []float64{9.02},
    59  				wantRevXY: []float64{9.02},
    60  			},
    61  			{alpha: 8, beta: -6,
    62  				want:      []float64{31.36},
    63  				wantRevX:  []float64{31.36},
    64  				wantRevY:  []float64{31.36},
    65  				wantRevXY: []float64{31.36},
    66  			},
    67  		},
    68  
    69  		Trans: []DgemvSubcase{ // (1x1)
    70  			{alpha: 0, beta: 0,
    71  				want:      []float64{0},
    72  				wantRevX:  []float64{0},
    73  				wantRevY:  []float64{0},
    74  				wantRevXY: []float64{0},
    75  			},
    76  			{alpha: 0, beta: 1,
    77  				want:      []float64{2.2},
    78  				wantRevX:  []float64{2.2},
    79  				wantRevY:  []float64{2.2},
    80  				wantRevXY: []float64{2.2},
    81  			},
    82  			{alpha: 1, beta: 0,
    83  				want:      []float64{27.88},
    84  				wantRevX:  []float64{27.88},
    85  				wantRevY:  []float64{27.88},
    86  				wantRevXY: []float64{27.88},
    87  			},
    88  			{alpha: 8, beta: -6,
    89  				want:      []float64{209.84},
    90  				wantRevX:  []float64{209.84},
    91  				wantRevY:  []float64{209.84},
    92  				wantRevXY: []float64{209.84},
    93  			},
    94  		},
    95  	},
    96  
    97  	{ // 3x2
    98  		m: 3,
    99  		n: 2,
   100  		A: []float64{
   101  			4.67, 2.75,
   102  			0.48, 1.21,
   103  			2.28, 2.82,
   104  		},
   105  		x: []float64{3.38, 3},
   106  		y: []float64{2.8, 1.71, 2.64},
   107  
   108  		NoTrans: []DgemvSubcase{ // (2x2, 1x2)
   109  			{alpha: 0, beta: 0,
   110  				want:      []float64{0, 0, 0},
   111  				wantRevX:  []float64{0, 0, 0},
   112  				wantRevY:  []float64{0, 0, 0},
   113  				wantRevXY: []float64{0, 0, 0},
   114  			},
   115  			{alpha: 0, beta: 1,
   116  				want:      []float64{2.8, 1.71, 2.64},
   117  				wantRevX:  []float64{2.8, 1.71, 2.64},
   118  				wantRevY:  []float64{2.8, 1.71, 2.64},
   119  				wantRevXY: []float64{2.8, 1.71, 2.64},
   120  			},
   121  			{alpha: 1, beta: 0,
   122  				want:      []float64{24.0346, 5.2524, 16.1664},
   123  				wantRevX:  []float64{23.305, 5.5298, 16.3716},
   124  				wantRevY:  []float64{16.1664, 5.2524, 24.0346},
   125  				wantRevXY: []float64{16.3716, 5.5298, 23.305},
   126  			},
   127  			{alpha: 8, beta: -6,
   128  				want:      []float64{175.4768, 31.7592, 113.4912},
   129  				wantRevX:  []float64{169.64, 33.9784, 115.1328},
   130  				wantRevY:  []float64{112.5312, 31.7592, 176.4368},
   131  				wantRevXY: []float64{114.1728, 33.9784, 170.6},
   132  			},
   133  		},
   134  
   135  		Trans: []DgemvSubcase{ // (2x2)
   136  			{alpha: 0, beta: 0,
   137  				want:      []float64{0, 0},
   138  				wantRevX:  []float64{0, 0},
   139  				wantRevY:  []float64{0, 0},
   140  				wantRevXY: []float64{0, 0},
   141  			},
   142  			{alpha: 0, beta: 1,
   143  				want:      []float64{3.38, 3},
   144  				wantRevX:  []float64{3.38, 3},
   145  				wantRevY:  []float64{3.38, 3},
   146  				wantRevXY: []float64{3.38, 3},
   147  			},
   148  			{alpha: 1, beta: 0,
   149  				want:      []float64{19.916, 17.2139},
   150  				wantRevX:  []float64{19.5336, 17.2251},
   151  				wantRevY:  []float64{17.2139, 19.916},
   152  				wantRevXY: []float64{17.2251, 19.5336},
   153  			},
   154  			{alpha: 8, beta: -6,
   155  				want:      []float64{139.048, 119.7112},
   156  				wantRevX:  []float64{135.9888, 119.8008},
   157  				wantRevY:  []float64{117.4312, 141.328},
   158  				wantRevXY: []float64{117.5208, 138.2688},
   159  			},
   160  		},
   161  	},
   162  
   163  	{ // 3x3
   164  		m: 3,
   165  		n: 3,
   166  		A: []float64{
   167  			4.38, 4.4, 4.26,
   168  			4.18, 0.56, 2.57,
   169  			2.59, 2.07, 0.46,
   170  		},
   171  		x: []float64{4.82, 1.82, 1.12},
   172  		y: []float64{0.24, 1.41, 3.45},
   173  
   174  		NoTrans: []DgemvSubcase{ // (2x2, 2x1, 1x2, 1x1)
   175  			{alpha: 0, beta: 0,
   176  				want:      []float64{0, 0, 0},
   177  				wantRevX:  []float64{0, 0, 0},
   178  				wantRevY:  []float64{0, 0, 0},
   179  				wantRevXY: []float64{0, 0, 0},
   180  			},
   181  			{alpha: 0, beta: 1,
   182  				want:      []float64{0.24, 1.41, 3.45},
   183  				wantRevX:  []float64{0.24, 1.41, 3.45},
   184  				wantRevY:  []float64{0.24, 1.41, 3.45},
   185  				wantRevXY: []float64{0.24, 1.41, 3.45},
   186  			},
   187  			{alpha: 1, beta: 0,
   188  				want:      []float64{33.8908, 24.0452, 16.7664},
   189  				wantRevX:  []float64{33.4468, 18.0882, 8.8854},
   190  				wantRevY:  []float64{16.7664, 24.0452, 33.8908},
   191  				wantRevXY: []float64{8.8854, 18.0882, 33.4468},
   192  			},
   193  			{alpha: 8, beta: -6,
   194  				want:      []float64{269.6864, 183.9016, 113.4312},
   195  				wantRevX:  []float64{266.1344, 136.2456, 50.3832},
   196  				wantRevY:  []float64{132.6912, 183.9016, 250.4264},
   197  				wantRevXY: []float64{69.6432, 136.2456, 246.8744},
   198  			},
   199  		},
   200  
   201  		Trans: []DgemvSubcase{ // (2x2, 1x2, 2x1, 1x1)
   202  			{alpha: 0, beta: 0,
   203  				want:      []float64{0, 0, 0},
   204  				wantRevX:  []float64{0, 0, 0},
   205  				wantRevY:  []float64{0, 0, 0},
   206  				wantRevXY: []float64{0, 0, 0},
   207  			},
   208  			{alpha: 0, beta: 1,
   209  				want:      []float64{4.82, 1.82, 1.12},
   210  				wantRevX:  []float64{4.82, 1.82, 1.12},
   211  				wantRevY:  []float64{4.82, 1.82, 1.12},
   212  				wantRevXY: []float64{4.82, 1.82, 1.12},
   213  			},
   214  			{alpha: 1, beta: 0,
   215  				want:      []float64{15.8805, 8.9871, 6.2331},
   216  				wantRevX:  []float64{21.6264, 16.4664, 18.4311},
   217  				wantRevY:  []float64{6.2331, 8.9871, 15.8805},
   218  				wantRevXY: []float64{18.4311, 16.4664, 21.6264},
   219  			},
   220  			{alpha: 8, beta: -6,
   221  				want:      []float64{98.124, 60.9768, 43.1448},
   222  				wantRevX:  []float64{144.0912, 120.8112, 140.7288},
   223  				wantRevY:  []float64{20.9448, 60.9768, 120.324},
   224  				wantRevXY: []float64{118.5288, 120.8112, 166.2912},
   225  			},
   226  		},
   227  	},
   228  
   229  	{ // 5x3
   230  		m: 5,
   231  		n: 3,
   232  		A: []float64{
   233  			4.1, 6.2, 8.1,
   234  			9.6, 3.5, 9.1,
   235  			10, 7, 3,
   236  			1, 1, 2,
   237  			9, 2, 5,
   238  		},
   239  		x: []float64{1, 2, 3},
   240  		y: []float64{7, 8, 9, 10, 11},
   241  
   242  		NoTrans: []DgemvSubcase{ //(4x2, 4x1, 1x2, 1x1)
   243  			{alpha: 0, beta: 0,
   244  				want:      []float64{0, 0, 0, 0, 0},
   245  				wantRevX:  []float64{0, 0, 0, 0, 0},
   246  				wantRevY:  []float64{0, 0, 0, 0, 0},
   247  				wantRevXY: []float64{0, 0, 0, 0, 0},
   248  			},
   249  			{alpha: 0, beta: 1,
   250  				want:      []float64{7, 8, 9, 10, 11},
   251  				wantRevX:  []float64{7, 8, 9, 10, 11},
   252  				wantRevY:  []float64{7, 8, 9, 10, 11},
   253  				wantRevXY: []float64{7, 8, 9, 10, 11},
   254  			},
   255  			{alpha: 1, beta: 0,
   256  				want:      []float64{40.8, 43.9, 33, 9, 28},
   257  				wantRevX:  []float64{32.8, 44.9, 47, 7, 36},
   258  				wantRevY:  []float64{28, 9, 33, 43.9, 40.8},
   259  				wantRevXY: []float64{36, 7, 47, 44.9, 32.8},
   260  			},
   261  			{alpha: 8, beta: -6,
   262  				want:      []float64{284.4, 303.2, 210, 12, 158},
   263  				wantRevX:  []float64{220.4, 311.2, 322, -4, 222},
   264  				wantRevY:  []float64{182, 24, 210, 291.2, 260.4},
   265  				wantRevXY: []float64{246, 8, 322, 299.2, 196.4},
   266  			},
   267  		},
   268  
   269  		Trans: []DgemvSubcase{ //( 2x4, 1x4, 2x1, 1x1)
   270  			{alpha: 0, beta: 0,
   271  				want:      []float64{0, 0, 0},
   272  				wantRevX:  []float64{0, 0, 0},
   273  				wantRevY:  []float64{0, 0, 0},
   274  				wantRevXY: []float64{0, 0, 0},
   275  			},
   276  			{alpha: 0, beta: 1,
   277  				want:      []float64{1, 2, 3},
   278  				wantRevX:  []float64{1, 2, 3},
   279  				wantRevY:  []float64{1, 2, 3},
   280  				wantRevXY: []float64{1, 2, 3},
   281  			},
   282  			{alpha: 1, beta: 0,
   283  				want:      []float64{304.5, 166.4, 231.5},
   284  				wantRevX:  []float64{302.1, 188.2, 258.1},
   285  				wantRevY:  []float64{231.5, 166.4, 304.5},
   286  				wantRevXY: []float64{258.1, 188.2, 302.1},
   287  			},
   288  			{alpha: 8, beta: -6,
   289  				want:      []float64{2430, 1319.2, 1834},
   290  				wantRevX:  []float64{2410.8, 1493.6, 2046.8},
   291  				wantRevY:  []float64{1846, 1319.2, 2418},
   292  				wantRevXY: []float64{2058.8, 1493.6, 2398.8},
   293  			},
   294  		},
   295  	},
   296  
   297  	{ // 3x5
   298  		m: 3,
   299  		n: 5,
   300  		A: []float64{
   301  			1.4, 2.34, 3.96, 0.96, 2.3,
   302  			3.43, 0.62, 1.09, 0.2, 3.56,
   303  			1.15, 0.58, 3.8, 1.16, 0.01,
   304  		},
   305  		x: []float64{2.34, 2.82, 4.73, 0.22, 3.91},
   306  		y: []float64{2.46, 2.22, 4.75},
   307  
   308  		NoTrans: []DgemvSubcase{ // (2x4, 2x1, 1x4, 1x1)
   309  			{alpha: 0, beta: 0,
   310  				want:      []float64{0, 0, 0},
   311  				wantRevX:  []float64{0, 0, 0},
   312  				wantRevY:  []float64{0, 0, 0},
   313  				wantRevXY: []float64{0, 0, 0},
   314  			},
   315  			{alpha: 0, beta: 1,
   316  				want:      []float64{2.46, 2.22, 4.75},
   317  				wantRevX:  []float64{2.46, 2.22, 4.75},
   318  				wantRevY:  []float64{2.46, 2.22, 4.75},
   319  				wantRevXY: []float64{2.46, 2.22, 4.75},
   320  			},
   321  			{alpha: 1, beta: 0,
   322  				want:      []float64{37.8098, 28.8939, 22.5949},
   323  				wantRevX:  []float64{32.8088, 27.5978, 25.8927},
   324  				wantRevY:  []float64{22.5949, 28.8939, 37.8098},
   325  				wantRevXY: []float64{25.8927, 27.5978, 32.8088},
   326  			},
   327  			{alpha: 8, beta: -6,
   328  				want:      []float64{287.7184, 217.8312, 152.2592},
   329  				wantRevX:  []float64{247.7104, 207.4624, 178.6416},
   330  				wantRevY:  []float64{165.9992, 217.8312, 273.9784},
   331  				wantRevXY: []float64{192.3816, 207.4624, 233.9704},
   332  			},
   333  		},
   334  
   335  		Trans: []DgemvSubcase{ // (4x2, 1x2, 4x1, 1x1)
   336  			{alpha: 0, beta: 0,
   337  				want:      []float64{0, 0, 0, 0, 0},
   338  				wantRevX:  []float64{0, 0, 0, 0, 0},
   339  				wantRevY:  []float64{0, 0, 0, 0, 0},
   340  				wantRevXY: []float64{0, 0, 0, 0, 0},
   341  			},
   342  			{alpha: 0, beta: 1,
   343  				want:      []float64{2.34, 2.82, 4.73, 0.22, 3.91},
   344  				wantRevX:  []float64{2.34, 2.82, 4.73, 0.22, 3.91},
   345  				wantRevY:  []float64{2.34, 2.82, 4.73, 0.22, 3.91},
   346  				wantRevXY: []float64{2.34, 2.82, 4.73, 0.22, 3.91},
   347  			},
   348  			{alpha: 1, beta: 0,
   349  				want:      []float64{16.5211, 9.8878, 30.2114, 8.3156, 13.6087},
   350  				wantRevX:  []float64{17.0936, 13.9182, 30.5778, 7.8576, 18.8528},
   351  				wantRevY:  []float64{13.6087, 8.3156, 30.2114, 9.8878, 16.5211},
   352  				wantRevXY: []float64{18.8528, 7.8576, 30.5778, 13.9182, 17.0936},
   353  			},
   354  			{alpha: 8, beta: -6,
   355  				want:      []float64{118.1288, 62.1824, 213.3112, 65.2048, 85.4096},
   356  				wantRevX:  []float64{122.7088, 94.4256, 216.2424, 61.5408, 127.3624},
   357  				wantRevY:  []float64{94.8296, 49.6048, 213.3112, 77.7824, 108.7088},
   358  				wantRevXY: []float64{136.7824, 45.9408, 216.2424, 110.0256, 113.2888},
   359  			},
   360  		},
   361  	},
   362  
   363  	{ // 7x7 & nan test
   364  		m: 7,
   365  		n: 7,
   366  		A: []float64{
   367  			0.9, 2.6, 0.5, 1.8, 2.3, 0.6, 0.2,
   368  			1.6, 0.6, 1.3, 2.1, 1.4, 0.4, 0.8,
   369  			2.9, 0.9, 2.3, 2.5, 1.4, 1.8, 1.6,
   370  			2.6, 2.8, 2.1, 0.3, nan, 2.2, 1.3,
   371  			0.2, 2.2, 1.8, 1.8, 2.1, 1.3, 1.4,
   372  			1.7, 1.4, 2.3, 2., 1., 0., 1.4,
   373  			2.1, 1.9, 0.8, 2.9, 1.3, 0.3, 1.3,
   374  		},
   375  		x: []float64{0.4, 2.8, 3.5, 0.3, 0.6, 2.5, 3.1},
   376  		y: []float64{3.2, 4.4, 5., 4.3, 4.1, 1.4, 0.2},
   377  
   378  		NoTrans: []DgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1)
   379  			{alpha: 0, beta: 0,
   380  				want:      []float64{0, 0, 0, nan, 0, 0, 0},
   381  				wantRevX:  []float64{0, 0, 0, nan, 0, 0, 0},
   382  				wantRevY:  []float64{0, 0, 0, nan, 0, 0, 0},
   383  				wantRevXY: []float64{0, 0, 0, nan, 0, 0, 0},
   384  			},
   385  			{alpha: 0, beta: 1,
   386  				want:      []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   387  				wantRevX:  []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   388  				wantRevY:  []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   389  				wantRevXY: []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2},
   390  			},
   391  			{alpha: 1, beta: 0,
   392  				want:      []float64{13.43, 11.82, 22.78, nan, 21.93, 18.19, 15.39},
   393  				wantRevX:  []float64{19.94, 14.21, 23.95, nan, 19.29, 14.81, 18.52},
   394  				wantRevY:  []float64{15.39, 18.19, 21.93, nan, 22.78, 11.82, 13.43},
   395  				wantRevXY: []float64{18.52, 14.81, 19.29, nan, 23.95, 14.21, 19.94},
   396  			},
   397  			{alpha: 8, beta: -6,
   398  				want:      []float64{88.24, 68.16, 152.24, nan, 150.84, 137.12, 121.92},
   399  				wantRevX:  []float64{140.32, 87.28, 161.6, nan, 129.72, 110.08, 146.96},
   400  				wantRevY:  []float64{103.92, 119.12, 145.44, nan, 157.64, 86.16, 106.24},
   401  				wantRevXY: []float64{128.96, 92.08, 124.32, nan, 167., 105.28, 158.32},
   402  			},
   403  		},
   404  
   405  		Trans: []DgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1)
   406  			{alpha: 0, beta: 0,
   407  				want:      []float64{0, 0, 0, 0, nan, 0, 0},
   408  				wantRevX:  []float64{0, 0, 0, 0, nan, 0, 0},
   409  				wantRevY:  []float64{0, 0, nan, 0, 0, 0, 0},
   410  				wantRevXY: []float64{0, 0, nan, 0, 0, 0, 0},
   411  			},
   412  			{alpha: 0, beta: 1,
   413  				want:      []float64{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1},
   414  				wantRevX:  []float64{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1},
   415  				wantRevY:  []float64{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1},
   416  				wantRevXY: []float64{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1},
   417  			},
   418  			{alpha: 1, beta: 0,
   419  				want:      []float64{39.22, 38.86, 38.61, 39.55, nan, 27.53, 25.71},
   420  				wantRevX:  []float64{40.69, 40.33, 42.06, 41.92, nan, 24.98, 30.63},
   421  				wantRevY:  []float64{25.71, 27.53, nan, 39.55, 38.61, 38.86, 39.22},
   422  				wantRevXY: []float64{30.63, 24.98, nan, 41.92, 42.06, 40.33, 40.69},
   423  			},
   424  			{alpha: 8, beta: -6,
   425  				want:      []float64{311.36, 294.08, 287.88, 314.6, nan, 205.24, 187.08},
   426  				wantRevX:  []float64{323.12, 305.84, 315.48, 333.56, nan, 184.84, 226.44},
   427  				wantRevY:  []float64{203.28, 203.44, nan, 314.6, 305.28, 295.88, 295.16},
   428  				wantRevXY: []float64{242.64, 183.04, nan, 333.56, 332.88, 307.64, 306.92},
   429  			},
   430  		},
   431  	},
   432  	{ // 11x11
   433  		m: 11,
   434  		n: 11,
   435  		A: []float64{
   436  			0.4, 3., 2.5, 2., 0.4, 2., 2., 1., 0.1, 0.3, 2.,
   437  			1.7, 0.7, 2.6, 1.6, 0.5, 2.4, 3., 0.9, 0.1, 2.8, 1.3,
   438  			1.1, 2.2, 1.5, 0.8, 2.9, 0.4, 0.5, 1.7, 0.8, 2.6, 0.7,
   439  			2.2, 1.7, 0.8, 2.9, 0.7, 0.7, 1.7, 1.8, 1.9, 2.4, 1.9,
   440  			0.3, 0.5, 1.6, 1.5, 1.5, 2.4, 1.7, 1.2, 1.9, 2.8, 1.2,
   441  			1.4, 2.2, 1.7, 1.4, 2.7, 1.4, 0.9, 1.8, 0.5, 1.2, 1.9,
   442  			0.8, 2.3, 1.7, 1.3, 2., 2.8, 2.6, 0.4, 2.5, 1.3, 0.5,
   443  			2.4, 2.8, 1.1, 0.2, 0.4, 2.8, 0.5, 0.5, 0., 2.8, 1.9,
   444  			2.3, 1.8, 2.3, 1.7, 1.1, 0.1, 1.4, 1.2, 1.9, 0.5, 0.6,
   445  			0.6, 2.4, 1.2, 0.3, 1.4, 1.3, 2.5, 2.6, 0., 1.3, 2.6,
   446  			0.7, 1.5, 0.2, 1.4, 1.1, 1.8, 0.2, 1., 1., 0.6, 1.2,
   447  		},
   448  		x: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   449  		y: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   450  
   451  		NoTrans: []DgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1)
   452  			{alpha: 0, beta: 0,
   453  				want:      []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   454  				wantRevX:  []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   455  				wantRevY:  []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   456  				wantRevXY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   457  			},
   458  			{alpha: 0, beta: 1,
   459  				want:      []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   460  				wantRevX:  []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   461  				wantRevY:  []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   462  				wantRevXY: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9},
   463  			},
   464  			{alpha: 1, beta: 0,
   465  				want:      []float64{32.71, 38.93, 33.55, 45.46, 39.24, 38.41, 46.23, 25.78, 37.33, 37.42, 24.63},
   466  				wantRevX:  []float64{39.82, 43.78, 37.73, 41.19, 40.17, 44.41, 42.75, 28.14, 35.6, 41.25, 23.9},
   467  				wantRevY:  []float64{24.63, 37.42, 37.33, 25.78, 46.23, 38.41, 39.24, 45.46, 33.55, 38.93, 32.71},
   468  				wantRevXY: []float64{23.9, 41.25, 35.6, 28.14, 42.75, 44.41, 40.17, 41.19, 37.73, 43.78, 39.82},
   469  			},
   470  			{alpha: 8, beta: -6,
   471  				want:      []float64{238.88, 291.04, 258.8, 334.88, 288.12, 304.28, 357.84, 191.24, 289.64, 282.56, 173.64},
   472  				wantRevX:  []float64{295.76, 329.84, 292.24, 300.72, 295.56, 352.28, 330., 210.12, 275.8, 313.2, 167.8},
   473  				wantRevY:  []float64{174.24, 278.96, 289.04, 177.44, 344.04, 304.28, 301.92, 348.68, 259.4, 294.64, 238.28},
   474  				wantRevXY: []float64{168.4, 309.6, 275.2, 196.32, 316.2, 352.28, 309.36, 314.52, 292.84, 333.44, 295.16},
   475  			},
   476  		},
   477  
   478  		Trans: []DgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1)
   479  			{alpha: 0, beta: 0,
   480  				want:      []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   481  				wantRevX:  []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   482  				wantRevY:  []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   483  				wantRevXY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0},
   484  			},
   485  			{alpha: 0, beta: 1,
   486  				want:      []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   487  				wantRevX:  []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   488  				wantRevY:  []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   489  				wantRevXY: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1},
   490  			},
   491  			{alpha: 1, beta: 0,
   492  				want:      []float64{37.07, 55.58, 46.05, 47.34, 33.88, 54.19, 50.85, 39.31, 31.29, 55.31, 46.98},
   493  				wantRevX:  []float64{38.11, 63.38, 46.44, 40.04, 34.63, 59.27, 50.13, 35.45, 28.26, 51.64, 46.22},
   494  				wantRevY:  []float64{46.98, 55.31, 31.29, 39.31, 50.85, 54.19, 33.88, 47.34, 46.05, 55.58, 37.07},
   495  				wantRevXY: []float64{46.22, 51.64, 28.26, 35.45, 50.13, 59.27, 34.63, 40.04, 46.44, 63.38, 38.11},
   496  			},
   497  			{alpha: 8, beta: -6,
   498  				want:      []float64{281.56, 437.44, 363.6, 361.32, 250.64, 422.72, 379.2, 294.68, 227.52, 437.08, 369.24},
   499  				wantRevX:  []float64{289.88, 499.84, 366.72, 302.92, 256.64, 463.36, 373.44, 263.8, 203.28, 407.72, 363.16},
   500  				wantRevY:  []float64{360.84, 435.28, 245.52, 297.08, 386.4, 422.72, 243.44, 358.92, 345.6, 439.24, 289.96},
   501  				wantRevXY: []float64{354.76, 405.92, 221.28, 266.2, 380.64, 463.36, 249.44, 300.52, 348.72, 501.64, 298.28},
   502  			},
   503  		},
   504  	},
   505  }
   506  
   507  func TestGemv(t *testing.T) {
   508  	for _, test := range DgemvCases {
   509  		t.Run(fmt.Sprintf("(%vx%v)", test.m, test.n), func(tt *testing.T) {
   510  			for i, cas := range test.NoTrans {
   511  				tt.Run(fmt.Sprintf("NoTrans case %v", i), func(st *testing.T) {
   512  					dgemvcomp(st, test, false, cas, i)
   513  				})
   514  			}
   515  			for i, cas := range test.Trans {
   516  				tt.Run(fmt.Sprintf("Trans case %v", i), func(st *testing.T) {
   517  					dgemvcomp(st, test, true, cas, i)
   518  				})
   519  			}
   520  		})
   521  	}
   522  }
   523  
   524  func dgemvcomp(t *testing.T, test DgemvCase, trans bool, cas DgemvSubcase, i int) {
   525  	const (
   526  		tol = 1e-15
   527  
   528  		xGdVal, yGdVal, aGdVal = 0.5, 1.5, 10
   529  		gdLn                   = 4
   530  	)
   531  	if trans {
   532  		test.x, test.y = test.y, test.x
   533  	}
   534  	prefix := fmt.Sprintf("Test (%vx%v) t:%v (a:%v,b:%v)", test.m, test.n, trans, cas.alpha, cas.beta)
   535  	xg, yg := guardVector(test.x, xGdVal, gdLn), guardVector(test.y, yGdVal, gdLn)
   536  	x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn]
   537  	ag := guardVector(test.A, aGdVal, gdLn)
   538  	a := ag[gdLn : len(ag)-gdLn]
   539  
   540  	lda := uintptr(test.n)
   541  	if trans {
   542  		GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1)
   543  	} else {
   544  		GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1)
   545  	}
   546  	for i := range cas.want {
   547  		if !sameApprox(y[i], cas.want[i], tol) {
   548  			t.Errorf(msgVal, prefix, i, y[i], cas.want[i])
   549  		}
   550  	}
   551  
   552  	if !isValidGuard(xg, xGdVal, gdLn) {
   553  		t.Errorf(msgGuard, prefix, "x", xg[:gdLn], xg[len(xg)-gdLn:])
   554  	}
   555  	if !isValidGuard(yg, yGdVal, gdLn) {
   556  		t.Errorf(msgGuard, prefix, "y", yg[:gdLn], yg[len(yg)-gdLn:])
   557  	}
   558  	if !isValidGuard(ag, aGdVal, gdLn) {
   559  		t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:])
   560  	}
   561  	if !equalStrided(test.x, x, 1) {
   562  		t.Errorf(msgReadOnly, prefix, "x")
   563  	}
   564  	if !equalStrided(test.A, a, 1) {
   565  		t.Errorf(msgReadOnly, prefix, "a")
   566  	}
   567  
   568  	for _, inc := range newIncSet(-1, 1, 2, 3, 90) {
   569  		incPrefix := fmt.Sprintf("%s inc(x:%v, y:%v)", prefix, inc.x, inc.y)
   570  		want, incY := cas.want, inc.y
   571  		switch {
   572  		case inc.x < 0 && inc.y < 0:
   573  			want = cas.wantRevXY
   574  			incY = -inc.y
   575  		case inc.x < 0:
   576  			want = cas.wantRevX
   577  		case inc.y < 0:
   578  			want = cas.wantRevY
   579  			incY = -inc.y
   580  		}
   581  		xg, yg := guardIncVector(test.x, xGdVal, inc.x, gdLn), guardIncVector(test.y, yGdVal, inc.y, gdLn)
   582  		x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn]
   583  		ag := guardVector(test.A, aGdVal, gdLn)
   584  		a := ag[gdLn : len(ag)-gdLn]
   585  
   586  		if trans {
   587  			GemvT(uintptr(test.m), uintptr(test.n), cas.alpha,
   588  				a, lda, x, uintptr(inc.x),
   589  				cas.beta, y, uintptr(inc.y))
   590  		} else {
   591  			GemvN(uintptr(test.m), uintptr(test.n), cas.alpha,
   592  				a, lda, x, uintptr(inc.x),
   593  				cas.beta, y, uintptr(inc.y))
   594  		}
   595  		for i := range want {
   596  			if !sameApprox(y[i*incY], want[i], tol) {
   597  				t.Errorf(msgVal, incPrefix, i, y[i*incY], want[i])
   598  				t.Error(y[i*incY] - want[i])
   599  			}
   600  		}
   601  
   602  		checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn)
   603  		checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn)
   604  		if !isValidGuard(ag, aGdVal, gdLn) {
   605  			t.Errorf(msgGuard, incPrefix, "a", ag[:gdLn], ag[len(ag)-gdLn:])
   606  		}
   607  		if !equalStrided(test.x, x, inc.x) {
   608  			t.Errorf(msgReadOnly, incPrefix, "x")
   609  		}
   610  		if !equalStrided(test.A, a, 1) {
   611  			t.Errorf(msgReadOnly, incPrefix, "a")
   612  		}
   613  	}
   614  }