github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/internal/asm/f64/gemv_test.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package f64_test 6 7 import ( 8 "fmt" 9 "testing" 10 11 . "github.com/jingcheng-WU/gonum/internal/asm/f64" 12 ) 13 14 type DgemvCase struct { 15 m int 16 n int 17 A []float64 18 x []float64 19 y []float64 20 21 NoTrans []DgemvSubcase 22 Trans []DgemvSubcase 23 } 24 25 type DgemvSubcase struct { 26 alpha float64 27 beta float64 28 want []float64 29 wantRevX []float64 30 wantRevY []float64 31 wantRevXY []float64 32 } 33 34 var DgemvCases = []DgemvCase{ 35 { // 1x1 36 m: 1, 37 n: 1, 38 A: []float64{4.1}, 39 x: []float64{2.2}, 40 y: []float64{6.8}, 41 42 NoTrans: []DgemvSubcase{ // (1x1) 43 {alpha: 0, beta: 0, 44 want: []float64{0}, 45 wantRevX: []float64{0}, 46 wantRevY: []float64{0}, 47 wantRevXY: []float64{0}, 48 }, 49 {alpha: 0, beta: 1, 50 want: []float64{6.8}, 51 wantRevX: []float64{6.8}, 52 wantRevY: []float64{6.8}, 53 wantRevXY: []float64{6.8}, 54 }, 55 {alpha: 1, beta: 0, 56 want: []float64{9.02}, 57 wantRevX: []float64{9.02}, 58 wantRevY: []float64{9.02}, 59 wantRevXY: []float64{9.02}, 60 }, 61 {alpha: 8, beta: -6, 62 want: []float64{31.36}, 63 wantRevX: []float64{31.36}, 64 wantRevY: []float64{31.36}, 65 wantRevXY: []float64{31.36}, 66 }, 67 }, 68 69 Trans: []DgemvSubcase{ // (1x1) 70 {alpha: 0, beta: 0, 71 want: []float64{0}, 72 wantRevX: []float64{0}, 73 wantRevY: []float64{0}, 74 wantRevXY: []float64{0}, 75 }, 76 {alpha: 0, beta: 1, 77 want: []float64{2.2}, 78 wantRevX: []float64{2.2}, 79 wantRevY: []float64{2.2}, 80 wantRevXY: []float64{2.2}, 81 }, 82 {alpha: 1, beta: 0, 83 want: []float64{27.88}, 84 wantRevX: []float64{27.88}, 85 wantRevY: []float64{27.88}, 86 wantRevXY: []float64{27.88}, 87 }, 88 {alpha: 8, beta: -6, 89 want: []float64{209.84}, 90 wantRevX: []float64{209.84}, 91 wantRevY: []float64{209.84}, 92 wantRevXY: []float64{209.84}, 93 }, 94 }, 95 }, 96 97 { // 3x2 98 m: 3, 99 n: 2, 100 A: []float64{ 101 4.67, 2.75, 102 0.48, 1.21, 103 2.28, 2.82, 104 }, 105 x: []float64{3.38, 3}, 106 y: []float64{2.8, 1.71, 2.64}, 107 108 NoTrans: []DgemvSubcase{ // (2x2, 1x2) 109 {alpha: 0, beta: 0, 110 want: []float64{0, 0, 0}, 111 wantRevX: []float64{0, 0, 0}, 112 wantRevY: []float64{0, 0, 0}, 113 wantRevXY: []float64{0, 0, 0}, 114 }, 115 {alpha: 0, beta: 1, 116 want: []float64{2.8, 1.71, 2.64}, 117 wantRevX: []float64{2.8, 1.71, 2.64}, 118 wantRevY: []float64{2.8, 1.71, 2.64}, 119 wantRevXY: []float64{2.8, 1.71, 2.64}, 120 }, 121 {alpha: 1, beta: 0, 122 want: []float64{24.0346, 5.2524, 16.1664}, 123 wantRevX: []float64{23.305, 5.5298, 16.3716}, 124 wantRevY: []float64{16.1664, 5.2524, 24.0346}, 125 wantRevXY: []float64{16.3716, 5.5298, 23.305}, 126 }, 127 {alpha: 8, beta: -6, 128 want: []float64{175.4768, 31.7592, 113.4912}, 129 wantRevX: []float64{169.64, 33.9784, 115.1328}, 130 wantRevY: []float64{112.5312, 31.7592, 176.4368}, 131 wantRevXY: []float64{114.1728, 33.9784, 170.6}, 132 }, 133 }, 134 135 Trans: []DgemvSubcase{ // (2x2) 136 {alpha: 0, beta: 0, 137 want: []float64{0, 0}, 138 wantRevX: []float64{0, 0}, 139 wantRevY: []float64{0, 0}, 140 wantRevXY: []float64{0, 0}, 141 }, 142 {alpha: 0, beta: 1, 143 want: []float64{3.38, 3}, 144 wantRevX: []float64{3.38, 3}, 145 wantRevY: []float64{3.38, 3}, 146 wantRevXY: []float64{3.38, 3}, 147 }, 148 {alpha: 1, beta: 0, 149 want: []float64{19.916, 17.2139}, 150 wantRevX: []float64{19.5336, 17.2251}, 151 wantRevY: []float64{17.2139, 19.916}, 152 wantRevXY: []float64{17.2251, 19.5336}, 153 }, 154 {alpha: 8, beta: -6, 155 want: []float64{139.048, 119.7112}, 156 wantRevX: []float64{135.9888, 119.8008}, 157 wantRevY: []float64{117.4312, 141.328}, 158 wantRevXY: []float64{117.5208, 138.2688}, 159 }, 160 }, 161 }, 162 163 { // 3x3 164 m: 3, 165 n: 3, 166 A: []float64{ 167 4.38, 4.4, 4.26, 168 4.18, 0.56, 2.57, 169 2.59, 2.07, 0.46, 170 }, 171 x: []float64{4.82, 1.82, 1.12}, 172 y: []float64{0.24, 1.41, 3.45}, 173 174 NoTrans: []DgemvSubcase{ // (2x2, 2x1, 1x2, 1x1) 175 {alpha: 0, beta: 0, 176 want: []float64{0, 0, 0}, 177 wantRevX: []float64{0, 0, 0}, 178 wantRevY: []float64{0, 0, 0}, 179 wantRevXY: []float64{0, 0, 0}, 180 }, 181 {alpha: 0, beta: 1, 182 want: []float64{0.24, 1.41, 3.45}, 183 wantRevX: []float64{0.24, 1.41, 3.45}, 184 wantRevY: []float64{0.24, 1.41, 3.45}, 185 wantRevXY: []float64{0.24, 1.41, 3.45}, 186 }, 187 {alpha: 1, beta: 0, 188 want: []float64{33.8908, 24.0452, 16.7664}, 189 wantRevX: []float64{33.4468, 18.0882, 8.8854}, 190 wantRevY: []float64{16.7664, 24.0452, 33.8908}, 191 wantRevXY: []float64{8.8854, 18.0882, 33.4468}, 192 }, 193 {alpha: 8, beta: -6, 194 want: []float64{269.6864, 183.9016, 113.4312}, 195 wantRevX: []float64{266.1344, 136.2456, 50.3832}, 196 wantRevY: []float64{132.6912, 183.9016, 250.4264}, 197 wantRevXY: []float64{69.6432, 136.2456, 246.8744}, 198 }, 199 }, 200 201 Trans: []DgemvSubcase{ // (2x2, 1x2, 2x1, 1x1) 202 {alpha: 0, beta: 0, 203 want: []float64{0, 0, 0}, 204 wantRevX: []float64{0, 0, 0}, 205 wantRevY: []float64{0, 0, 0}, 206 wantRevXY: []float64{0, 0, 0}, 207 }, 208 {alpha: 0, beta: 1, 209 want: []float64{4.82, 1.82, 1.12}, 210 wantRevX: []float64{4.82, 1.82, 1.12}, 211 wantRevY: []float64{4.82, 1.82, 1.12}, 212 wantRevXY: []float64{4.82, 1.82, 1.12}, 213 }, 214 {alpha: 1, beta: 0, 215 want: []float64{15.8805, 8.9871, 6.2331}, 216 wantRevX: []float64{21.6264, 16.4664, 18.4311}, 217 wantRevY: []float64{6.2331, 8.9871, 15.8805}, 218 wantRevXY: []float64{18.4311, 16.4664, 21.6264}, 219 }, 220 {alpha: 8, beta: -6, 221 want: []float64{98.124, 60.9768, 43.1448}, 222 wantRevX: []float64{144.0912, 120.8112, 140.7288}, 223 wantRevY: []float64{20.9448, 60.9768, 120.324}, 224 wantRevXY: []float64{118.5288, 120.8112, 166.2912}, 225 }, 226 }, 227 }, 228 229 { // 5x3 230 m: 5, 231 n: 3, 232 A: []float64{ 233 4.1, 6.2, 8.1, 234 9.6, 3.5, 9.1, 235 10, 7, 3, 236 1, 1, 2, 237 9, 2, 5, 238 }, 239 x: []float64{1, 2, 3}, 240 y: []float64{7, 8, 9, 10, 11}, 241 242 NoTrans: []DgemvSubcase{ //(4x2, 4x1, 1x2, 1x1) 243 {alpha: 0, beta: 0, 244 want: []float64{0, 0, 0, 0, 0}, 245 wantRevX: []float64{0, 0, 0, 0, 0}, 246 wantRevY: []float64{0, 0, 0, 0, 0}, 247 wantRevXY: []float64{0, 0, 0, 0, 0}, 248 }, 249 {alpha: 0, beta: 1, 250 want: []float64{7, 8, 9, 10, 11}, 251 wantRevX: []float64{7, 8, 9, 10, 11}, 252 wantRevY: []float64{7, 8, 9, 10, 11}, 253 wantRevXY: []float64{7, 8, 9, 10, 11}, 254 }, 255 {alpha: 1, beta: 0, 256 want: []float64{40.8, 43.9, 33, 9, 28}, 257 wantRevX: []float64{32.8, 44.9, 47, 7, 36}, 258 wantRevY: []float64{28, 9, 33, 43.9, 40.8}, 259 wantRevXY: []float64{36, 7, 47, 44.9, 32.8}, 260 }, 261 {alpha: 8, beta: -6, 262 want: []float64{284.4, 303.2, 210, 12, 158}, 263 wantRevX: []float64{220.4, 311.2, 322, -4, 222}, 264 wantRevY: []float64{182, 24, 210, 291.2, 260.4}, 265 wantRevXY: []float64{246, 8, 322, 299.2, 196.4}, 266 }, 267 }, 268 269 Trans: []DgemvSubcase{ //( 2x4, 1x4, 2x1, 1x1) 270 {alpha: 0, beta: 0, 271 want: []float64{0, 0, 0}, 272 wantRevX: []float64{0, 0, 0}, 273 wantRevY: []float64{0, 0, 0}, 274 wantRevXY: []float64{0, 0, 0}, 275 }, 276 {alpha: 0, beta: 1, 277 want: []float64{1, 2, 3}, 278 wantRevX: []float64{1, 2, 3}, 279 wantRevY: []float64{1, 2, 3}, 280 wantRevXY: []float64{1, 2, 3}, 281 }, 282 {alpha: 1, beta: 0, 283 want: []float64{304.5, 166.4, 231.5}, 284 wantRevX: []float64{302.1, 188.2, 258.1}, 285 wantRevY: []float64{231.5, 166.4, 304.5}, 286 wantRevXY: []float64{258.1, 188.2, 302.1}, 287 }, 288 {alpha: 8, beta: -6, 289 want: []float64{2430, 1319.2, 1834}, 290 wantRevX: []float64{2410.8, 1493.6, 2046.8}, 291 wantRevY: []float64{1846, 1319.2, 2418}, 292 wantRevXY: []float64{2058.8, 1493.6, 2398.8}, 293 }, 294 }, 295 }, 296 297 { // 3x5 298 m: 3, 299 n: 5, 300 A: []float64{ 301 1.4, 2.34, 3.96, 0.96, 2.3, 302 3.43, 0.62, 1.09, 0.2, 3.56, 303 1.15, 0.58, 3.8, 1.16, 0.01, 304 }, 305 x: []float64{2.34, 2.82, 4.73, 0.22, 3.91}, 306 y: []float64{2.46, 2.22, 4.75}, 307 308 NoTrans: []DgemvSubcase{ // (2x4, 2x1, 1x4, 1x1) 309 {alpha: 0, beta: 0, 310 want: []float64{0, 0, 0}, 311 wantRevX: []float64{0, 0, 0}, 312 wantRevY: []float64{0, 0, 0}, 313 wantRevXY: []float64{0, 0, 0}, 314 }, 315 {alpha: 0, beta: 1, 316 want: []float64{2.46, 2.22, 4.75}, 317 wantRevX: []float64{2.46, 2.22, 4.75}, 318 wantRevY: []float64{2.46, 2.22, 4.75}, 319 wantRevXY: []float64{2.46, 2.22, 4.75}, 320 }, 321 {alpha: 1, beta: 0, 322 want: []float64{37.8098, 28.8939, 22.5949}, 323 wantRevX: []float64{32.8088, 27.5978, 25.8927}, 324 wantRevY: []float64{22.5949, 28.8939, 37.8098}, 325 wantRevXY: []float64{25.8927, 27.5978, 32.8088}, 326 }, 327 {alpha: 8, beta: -6, 328 want: []float64{287.7184, 217.8312, 152.2592}, 329 wantRevX: []float64{247.7104, 207.4624, 178.6416}, 330 wantRevY: []float64{165.9992, 217.8312, 273.9784}, 331 wantRevXY: []float64{192.3816, 207.4624, 233.9704}, 332 }, 333 }, 334 335 Trans: []DgemvSubcase{ // (4x2, 1x2, 4x1, 1x1) 336 {alpha: 0, beta: 0, 337 want: []float64{0, 0, 0, 0, 0}, 338 wantRevX: []float64{0, 0, 0, 0, 0}, 339 wantRevY: []float64{0, 0, 0, 0, 0}, 340 wantRevXY: []float64{0, 0, 0, 0, 0}, 341 }, 342 {alpha: 0, beta: 1, 343 want: []float64{2.34, 2.82, 4.73, 0.22, 3.91}, 344 wantRevX: []float64{2.34, 2.82, 4.73, 0.22, 3.91}, 345 wantRevY: []float64{2.34, 2.82, 4.73, 0.22, 3.91}, 346 wantRevXY: []float64{2.34, 2.82, 4.73, 0.22, 3.91}, 347 }, 348 {alpha: 1, beta: 0, 349 want: []float64{16.5211, 9.8878, 30.2114, 8.3156, 13.6087}, 350 wantRevX: []float64{17.0936, 13.9182, 30.5778, 7.8576, 18.8528}, 351 wantRevY: []float64{13.6087, 8.3156, 30.2114, 9.8878, 16.5211}, 352 wantRevXY: []float64{18.8528, 7.8576, 30.5778, 13.9182, 17.0936}, 353 }, 354 {alpha: 8, beta: -6, 355 want: []float64{118.1288, 62.1824, 213.3112, 65.2048, 85.4096}, 356 wantRevX: []float64{122.7088, 94.4256, 216.2424, 61.5408, 127.3624}, 357 wantRevY: []float64{94.8296, 49.6048, 213.3112, 77.7824, 108.7088}, 358 wantRevXY: []float64{136.7824, 45.9408, 216.2424, 110.0256, 113.2888}, 359 }, 360 }, 361 }, 362 363 { // 7x7 & nan test 364 m: 7, 365 n: 7, 366 A: []float64{ 367 0.9, 2.6, 0.5, 1.8, 2.3, 0.6, 0.2, 368 1.6, 0.6, 1.3, 2.1, 1.4, 0.4, 0.8, 369 2.9, 0.9, 2.3, 2.5, 1.4, 1.8, 1.6, 370 2.6, 2.8, 2.1, 0.3, nan, 2.2, 1.3, 371 0.2, 2.2, 1.8, 1.8, 2.1, 1.3, 1.4, 372 1.7, 1.4, 2.3, 2., 1., 0., 1.4, 373 2.1, 1.9, 0.8, 2.9, 1.3, 0.3, 1.3, 374 }, 375 x: []float64{0.4, 2.8, 3.5, 0.3, 0.6, 2.5, 3.1}, 376 y: []float64{3.2, 4.4, 5., 4.3, 4.1, 1.4, 0.2}, 377 378 NoTrans: []DgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) 379 {alpha: 0, beta: 0, 380 want: []float64{0, 0, 0, nan, 0, 0, 0}, 381 wantRevX: []float64{0, 0, 0, nan, 0, 0, 0}, 382 wantRevY: []float64{0, 0, 0, nan, 0, 0, 0}, 383 wantRevXY: []float64{0, 0, 0, nan, 0, 0, 0}, 384 }, 385 {alpha: 0, beta: 1, 386 want: []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 387 wantRevX: []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 388 wantRevY: []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 389 wantRevXY: []float64{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 390 }, 391 {alpha: 1, beta: 0, 392 want: []float64{13.43, 11.82, 22.78, nan, 21.93, 18.19, 15.39}, 393 wantRevX: []float64{19.94, 14.21, 23.95, nan, 19.29, 14.81, 18.52}, 394 wantRevY: []float64{15.39, 18.19, 21.93, nan, 22.78, 11.82, 13.43}, 395 wantRevXY: []float64{18.52, 14.81, 19.29, nan, 23.95, 14.21, 19.94}, 396 }, 397 {alpha: 8, beta: -6, 398 want: []float64{88.24, 68.16, 152.24, nan, 150.84, 137.12, 121.92}, 399 wantRevX: []float64{140.32, 87.28, 161.6, nan, 129.72, 110.08, 146.96}, 400 wantRevY: []float64{103.92, 119.12, 145.44, nan, 157.64, 86.16, 106.24}, 401 wantRevXY: []float64{128.96, 92.08, 124.32, nan, 167., 105.28, 158.32}, 402 }, 403 }, 404 405 Trans: []DgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) 406 {alpha: 0, beta: 0, 407 want: []float64{0, 0, 0, 0, nan, 0, 0}, 408 wantRevX: []float64{0, 0, 0, 0, nan, 0, 0}, 409 wantRevY: []float64{0, 0, nan, 0, 0, 0, 0}, 410 wantRevXY: []float64{0, 0, nan, 0, 0, 0, 0}, 411 }, 412 {alpha: 0, beta: 1, 413 want: []float64{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, 414 wantRevX: []float64{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, 415 wantRevY: []float64{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, 416 wantRevXY: []float64{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, 417 }, 418 {alpha: 1, beta: 0, 419 want: []float64{39.22, 38.86, 38.61, 39.55, nan, 27.53, 25.71}, 420 wantRevX: []float64{40.69, 40.33, 42.06, 41.92, nan, 24.98, 30.63}, 421 wantRevY: []float64{25.71, 27.53, nan, 39.55, 38.61, 38.86, 39.22}, 422 wantRevXY: []float64{30.63, 24.98, nan, 41.92, 42.06, 40.33, 40.69}, 423 }, 424 {alpha: 8, beta: -6, 425 want: []float64{311.36, 294.08, 287.88, 314.6, nan, 205.24, 187.08}, 426 wantRevX: []float64{323.12, 305.84, 315.48, 333.56, nan, 184.84, 226.44}, 427 wantRevY: []float64{203.28, 203.44, nan, 314.6, 305.28, 295.88, 295.16}, 428 wantRevXY: []float64{242.64, 183.04, nan, 333.56, 332.88, 307.64, 306.92}, 429 }, 430 }, 431 }, 432 { // 11x11 433 m: 11, 434 n: 11, 435 A: []float64{ 436 0.4, 3., 2.5, 2., 0.4, 2., 2., 1., 0.1, 0.3, 2., 437 1.7, 0.7, 2.6, 1.6, 0.5, 2.4, 3., 0.9, 0.1, 2.8, 1.3, 438 1.1, 2.2, 1.5, 0.8, 2.9, 0.4, 0.5, 1.7, 0.8, 2.6, 0.7, 439 2.2, 1.7, 0.8, 2.9, 0.7, 0.7, 1.7, 1.8, 1.9, 2.4, 1.9, 440 0.3, 0.5, 1.6, 1.5, 1.5, 2.4, 1.7, 1.2, 1.9, 2.8, 1.2, 441 1.4, 2.2, 1.7, 1.4, 2.7, 1.4, 0.9, 1.8, 0.5, 1.2, 1.9, 442 0.8, 2.3, 1.7, 1.3, 2., 2.8, 2.6, 0.4, 2.5, 1.3, 0.5, 443 2.4, 2.8, 1.1, 0.2, 0.4, 2.8, 0.5, 0.5, 0., 2.8, 1.9, 444 2.3, 1.8, 2.3, 1.7, 1.1, 0.1, 1.4, 1.2, 1.9, 0.5, 0.6, 445 0.6, 2.4, 1.2, 0.3, 1.4, 1.3, 2.5, 2.6, 0., 1.3, 2.6, 446 0.7, 1.5, 0.2, 1.4, 1.1, 1.8, 0.2, 1., 1., 0.6, 1.2, 447 }, 448 x: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 449 y: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 450 451 NoTrans: []DgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) 452 {alpha: 0, beta: 0, 453 want: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 454 wantRevX: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 455 wantRevY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 456 wantRevXY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 457 }, 458 {alpha: 0, beta: 1, 459 want: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 460 wantRevX: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 461 wantRevY: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 462 wantRevXY: []float64{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 463 }, 464 {alpha: 1, beta: 0, 465 want: []float64{32.71, 38.93, 33.55, 45.46, 39.24, 38.41, 46.23, 25.78, 37.33, 37.42, 24.63}, 466 wantRevX: []float64{39.82, 43.78, 37.73, 41.19, 40.17, 44.41, 42.75, 28.14, 35.6, 41.25, 23.9}, 467 wantRevY: []float64{24.63, 37.42, 37.33, 25.78, 46.23, 38.41, 39.24, 45.46, 33.55, 38.93, 32.71}, 468 wantRevXY: []float64{23.9, 41.25, 35.6, 28.14, 42.75, 44.41, 40.17, 41.19, 37.73, 43.78, 39.82}, 469 }, 470 {alpha: 8, beta: -6, 471 want: []float64{238.88, 291.04, 258.8, 334.88, 288.12, 304.28, 357.84, 191.24, 289.64, 282.56, 173.64}, 472 wantRevX: []float64{295.76, 329.84, 292.24, 300.72, 295.56, 352.28, 330., 210.12, 275.8, 313.2, 167.8}, 473 wantRevY: []float64{174.24, 278.96, 289.04, 177.44, 344.04, 304.28, 301.92, 348.68, 259.4, 294.64, 238.28}, 474 wantRevXY: []float64{168.4, 309.6, 275.2, 196.32, 316.2, 352.28, 309.36, 314.52, 292.84, 333.44, 295.16}, 475 }, 476 }, 477 478 Trans: []DgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) 479 {alpha: 0, beta: 0, 480 want: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 481 wantRevX: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 482 wantRevY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 483 wantRevXY: []float64{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 484 }, 485 {alpha: 0, beta: 1, 486 want: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 487 wantRevX: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 488 wantRevY: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 489 wantRevXY: []float64{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 490 }, 491 {alpha: 1, beta: 0, 492 want: []float64{37.07, 55.58, 46.05, 47.34, 33.88, 54.19, 50.85, 39.31, 31.29, 55.31, 46.98}, 493 wantRevX: []float64{38.11, 63.38, 46.44, 40.04, 34.63, 59.27, 50.13, 35.45, 28.26, 51.64, 46.22}, 494 wantRevY: []float64{46.98, 55.31, 31.29, 39.31, 50.85, 54.19, 33.88, 47.34, 46.05, 55.58, 37.07}, 495 wantRevXY: []float64{46.22, 51.64, 28.26, 35.45, 50.13, 59.27, 34.63, 40.04, 46.44, 63.38, 38.11}, 496 }, 497 {alpha: 8, beta: -6, 498 want: []float64{281.56, 437.44, 363.6, 361.32, 250.64, 422.72, 379.2, 294.68, 227.52, 437.08, 369.24}, 499 wantRevX: []float64{289.88, 499.84, 366.72, 302.92, 256.64, 463.36, 373.44, 263.8, 203.28, 407.72, 363.16}, 500 wantRevY: []float64{360.84, 435.28, 245.52, 297.08, 386.4, 422.72, 243.44, 358.92, 345.6, 439.24, 289.96}, 501 wantRevXY: []float64{354.76, 405.92, 221.28, 266.2, 380.64, 463.36, 249.44, 300.52, 348.72, 501.64, 298.28}, 502 }, 503 }, 504 }, 505 } 506 507 func TestGemv(t *testing.T) { 508 for _, test := range DgemvCases { 509 t.Run(fmt.Sprintf("(%vx%v)", test.m, test.n), func(tt *testing.T) { 510 for i, cas := range test.NoTrans { 511 tt.Run(fmt.Sprintf("NoTrans case %v", i), func(st *testing.T) { 512 dgemvcomp(st, test, false, cas, i) 513 }) 514 } 515 for i, cas := range test.Trans { 516 tt.Run(fmt.Sprintf("Trans case %v", i), func(st *testing.T) { 517 dgemvcomp(st, test, true, cas, i) 518 }) 519 } 520 }) 521 } 522 } 523 524 func dgemvcomp(t *testing.T, test DgemvCase, trans bool, cas DgemvSubcase, i int) { 525 const ( 526 tol = 1e-15 527 528 xGdVal, yGdVal, aGdVal = 0.5, 1.5, 10 529 gdLn = 4 530 ) 531 if trans { 532 test.x, test.y = test.y, test.x 533 } 534 prefix := fmt.Sprintf("Test (%vx%v) t:%v (a:%v,b:%v)", test.m, test.n, trans, cas.alpha, cas.beta) 535 xg, yg := guardVector(test.x, xGdVal, gdLn), guardVector(test.y, yGdVal, gdLn) 536 x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] 537 ag := guardVector(test.A, aGdVal, gdLn) 538 a := ag[gdLn : len(ag)-gdLn] 539 540 lda := uintptr(test.n) 541 if trans { 542 GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) 543 } else { 544 GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) 545 } 546 for i := range cas.want { 547 if !sameApprox(y[i], cas.want[i], tol) { 548 t.Errorf(msgVal, prefix, i, y[i], cas.want[i]) 549 } 550 } 551 552 if !isValidGuard(xg, xGdVal, gdLn) { 553 t.Errorf(msgGuard, prefix, "x", xg[:gdLn], xg[len(xg)-gdLn:]) 554 } 555 if !isValidGuard(yg, yGdVal, gdLn) { 556 t.Errorf(msgGuard, prefix, "y", yg[:gdLn], yg[len(yg)-gdLn:]) 557 } 558 if !isValidGuard(ag, aGdVal, gdLn) { 559 t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) 560 } 561 if !equalStrided(test.x, x, 1) { 562 t.Errorf(msgReadOnly, prefix, "x") 563 } 564 if !equalStrided(test.A, a, 1) { 565 t.Errorf(msgReadOnly, prefix, "a") 566 } 567 568 for _, inc := range newIncSet(-1, 1, 2, 3, 90) { 569 incPrefix := fmt.Sprintf("%s inc(x:%v, y:%v)", prefix, inc.x, inc.y) 570 want, incY := cas.want, inc.y 571 switch { 572 case inc.x < 0 && inc.y < 0: 573 want = cas.wantRevXY 574 incY = -inc.y 575 case inc.x < 0: 576 want = cas.wantRevX 577 case inc.y < 0: 578 want = cas.wantRevY 579 incY = -inc.y 580 } 581 xg, yg := guardIncVector(test.x, xGdVal, inc.x, gdLn), guardIncVector(test.y, yGdVal, inc.y, gdLn) 582 x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] 583 ag := guardVector(test.A, aGdVal, gdLn) 584 a := ag[gdLn : len(ag)-gdLn] 585 586 if trans { 587 GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, 588 a, lda, x, uintptr(inc.x), 589 cas.beta, y, uintptr(inc.y)) 590 } else { 591 GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, 592 a, lda, x, uintptr(inc.x), 593 cas.beta, y, uintptr(inc.y)) 594 } 595 for i := range want { 596 if !sameApprox(y[i*incY], want[i], tol) { 597 t.Errorf(msgVal, incPrefix, i, y[i*incY], want[i]) 598 t.Error(y[i*incY] - want[i]) 599 } 600 } 601 602 checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn) 603 checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn) 604 if !isValidGuard(ag, aGdVal, gdLn) { 605 t.Errorf(msgGuard, incPrefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) 606 } 607 if !equalStrided(test.x, x, inc.x) { 608 t.Errorf(msgReadOnly, incPrefix, "x") 609 } 610 if !equalStrided(test.A, a, 1) { 611 t.Errorf(msgReadOnly, incPrefix, "a") 612 } 613 } 614 }