gonum.org/v1/gonum@v0.14.0/internal/asm/f32/gemv_test.go (about) 1 // Copyright ©2017 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package f32_test 6 7 import ( 8 "fmt" 9 "testing" 10 11 . "gonum.org/v1/gonum/internal/asm/f32" 12 "gonum.org/v1/gonum/internal/math32" 13 ) 14 15 type SgemvCase struct { 16 m int 17 n int 18 A []float32 19 x []float32 20 y []float32 21 22 NoTrans []SgemvSubcase 23 Trans []SgemvSubcase 24 } 25 26 type SgemvSubcase struct { 27 alpha float32 28 beta float32 29 want []float32 30 wantRevX []float32 31 wantRevY []float32 32 wantRevXY []float32 33 } 34 35 var SgemvCases = []SgemvCase{ 36 { // 1x1 37 m: 1, 38 n: 1, 39 A: []float32{4.1}, 40 x: []float32{2.2}, 41 y: []float32{6.8}, 42 43 NoTrans: []SgemvSubcase{ // (1x1) 44 {alpha: 0, beta: 0, 45 want: []float32{0}, 46 wantRevX: []float32{0}, 47 wantRevY: []float32{0}, 48 wantRevXY: []float32{0}, 49 }, 50 {alpha: 0, beta: 1, 51 want: []float32{6.8}, 52 wantRevX: []float32{6.8}, 53 wantRevY: []float32{6.8}, 54 wantRevXY: []float32{6.8}, 55 }, 56 {alpha: 1, beta: 0, 57 want: []float32{9.02}, 58 wantRevX: []float32{9.02}, 59 wantRevY: []float32{9.02}, 60 wantRevXY: []float32{9.02}, 61 }, 62 {alpha: 8, beta: -6, 63 want: []float32{31.36}, 64 wantRevX: []float32{31.36}, 65 wantRevY: []float32{31.36}, 66 wantRevXY: []float32{31.36}, 67 }, 68 }, 69 70 Trans: []SgemvSubcase{ // (1x1) 71 {alpha: 0, beta: 0, 72 want: []float32{0}, 73 wantRevX: []float32{0}, 74 wantRevY: []float32{0}, 75 wantRevXY: []float32{0}, 76 }, 77 {alpha: 0, beta: 1, 78 want: []float32{2.2}, 79 wantRevX: []float32{2.2}, 80 wantRevY: []float32{2.2}, 81 wantRevXY: []float32{2.2}, 82 }, 83 {alpha: 1, beta: 0, 84 want: []float32{27.88}, 85 wantRevX: []float32{27.88}, 86 wantRevY: []float32{27.88}, 87 wantRevXY: []float32{27.88}, 88 }, 89 {alpha: 8, beta: -6, 90 want: []float32{209.84}, 91 wantRevX: []float32{209.84}, 92 wantRevY: []float32{209.84}, 93 wantRevXY: []float32{209.84}, 94 }, 95 }, 96 }, 97 98 { // 3x2 99 m: 3, 100 n: 2, 101 A: []float32{ 102 4.67, 2.75, 103 0.48, 1.21, 104 2.28, 2.82, 105 }, 106 x: []float32{3.38, 3}, 107 y: []float32{2.8, 1.71, 2.64}, 108 109 NoTrans: []SgemvSubcase{ // (2x2, 1x2) 110 {alpha: 0, beta: 0, 111 want: []float32{0, 0, 0}, 112 wantRevX: []float32{0, 0, 0}, 113 wantRevY: []float32{0, 0, 0}, 114 wantRevXY: []float32{0, 0, 0}, 115 }, 116 {alpha: 0, beta: 1, 117 want: []float32{2.8, 1.71, 2.64}, 118 wantRevX: []float32{2.8, 1.71, 2.64}, 119 wantRevY: []float32{2.8, 1.71, 2.64}, 120 wantRevXY: []float32{2.8, 1.71, 2.64}, 121 }, 122 {alpha: 1, beta: 0, 123 want: []float32{24.0346, 5.2524, 16.1664}, 124 wantRevX: []float32{23.305, 5.5298, 16.3716}, 125 wantRevY: []float32{16.1664, 5.2524, 24.0346}, 126 wantRevXY: []float32{16.3716, 5.5298, 23.305}, 127 }, 128 {alpha: 8, beta: -6, 129 want: []float32{175.4768, 31.7592, 113.4912}, 130 wantRevX: []float32{169.64, 33.9784, 115.1328}, 131 wantRevY: []float32{112.5312, 31.7592, 176.4368}, 132 wantRevXY: []float32{114.1728, 33.9784, 170.6}, 133 }, 134 }, 135 136 Trans: []SgemvSubcase{ // (2x2) 137 {alpha: 0, beta: 0, 138 want: []float32{0, 0}, 139 wantRevX: []float32{0, 0}, 140 wantRevY: []float32{0, 0}, 141 wantRevXY: []float32{0, 0}, 142 }, 143 {alpha: 0, beta: 1, 144 want: []float32{3.38, 3}, 145 wantRevX: []float32{3.38, 3}, 146 wantRevY: []float32{3.38, 3}, 147 wantRevXY: []float32{3.38, 3}, 148 }, 149 {alpha: 1, beta: 0, 150 want: []float32{19.916, 17.2139}, 151 wantRevX: []float32{19.5336, 17.2251}, 152 wantRevY: []float32{17.2139, 19.916}, 153 wantRevXY: []float32{17.2251, 19.5336}, 154 }, 155 {alpha: 8, beta: -6, 156 want: []float32{139.048, 119.7112}, 157 wantRevX: []float32{135.9888, 119.8008}, 158 wantRevY: []float32{117.4312, 141.328}, 159 wantRevXY: []float32{117.5208, 138.2688}, 160 }, 161 }, 162 }, 163 164 { // 3x3 165 m: 3, 166 n: 3, 167 A: []float32{ 168 4.38, 4.4, 4.26, 169 4.18, 0.56, 2.57, 170 2.59, 2.07, 0.46, 171 }, 172 x: []float32{4.82, 1.82, 1.12}, 173 y: []float32{0.24, 1.41, 3.45}, 174 175 NoTrans: []SgemvSubcase{ // (2x2, 2x1, 1x2, 1x1) 176 {alpha: 0, beta: 0, 177 want: []float32{0, 0, 0}, 178 wantRevX: []float32{0, 0, 0}, 179 wantRevY: []float32{0, 0, 0}, 180 wantRevXY: []float32{0, 0, 0}, 181 }, 182 {alpha: 0, beta: 1, 183 want: []float32{0.24, 1.41, 3.45}, 184 wantRevX: []float32{0.24, 1.41, 3.45}, 185 wantRevY: []float32{0.24, 1.41, 3.45}, 186 wantRevXY: []float32{0.24, 1.41, 3.45}, 187 }, 188 {alpha: 1, beta: 0, 189 want: []float32{33.8908, 24.0452, 16.7664}, 190 wantRevX: []float32{33.4468, 18.0882, 8.8854}, 191 wantRevY: []float32{16.7664, 24.0452, 33.8908}, 192 wantRevXY: []float32{8.8854, 18.0882, 33.4468}, 193 }, 194 {alpha: 8, beta: -6, 195 want: []float32{269.6864, 183.9016, 113.4312}, 196 wantRevX: []float32{266.1344, 136.2456, 50.3832}, 197 wantRevY: []float32{132.6912, 183.9016, 250.4264}, 198 wantRevXY: []float32{69.6432, 136.2456, 246.8744}, 199 }, 200 }, 201 202 Trans: []SgemvSubcase{ // (2x2, 1x2, 2x1, 1x1) 203 {alpha: 0, beta: 0, 204 want: []float32{0, 0, 0}, 205 wantRevX: []float32{0, 0, 0}, 206 wantRevY: []float32{0, 0, 0}, 207 wantRevXY: []float32{0, 0, 0}, 208 }, 209 {alpha: 0, beta: 1, 210 want: []float32{4.82, 1.82, 1.12}, 211 wantRevX: []float32{4.82, 1.82, 1.12}, 212 wantRevY: []float32{4.82, 1.82, 1.12}, 213 wantRevXY: []float32{4.82, 1.82, 1.12}, 214 }, 215 {alpha: 1, beta: 0, 216 want: []float32{15.8805, 8.9871, 6.2331}, 217 wantRevX: []float32{21.6264, 16.4664, 18.4311}, 218 wantRevY: []float32{6.2331, 8.9871, 15.8805}, 219 wantRevXY: []float32{18.4311, 16.4664, 21.6264}, 220 }, 221 {alpha: 8, beta: -6, 222 want: []float32{98.124, 60.9768, 43.1448}, 223 wantRevX: []float32{144.0912, 120.8112, 140.7288}, 224 wantRevY: []float32{20.9448, 60.9768, 120.324}, 225 wantRevXY: []float32{118.5288, 120.8112, 166.2912}, 226 }, 227 }, 228 }, 229 230 { // 5x3 231 m: 5, 232 n: 3, 233 A: []float32{ 234 4.1, 6.2, 8.1, 235 9.6, 3.5, 9.1, 236 10, 7, 3, 237 1, 1, 2, 238 9, 2, 5, 239 }, 240 x: []float32{1, 2, 3}, 241 y: []float32{7, 8, 9, 10, 11}, 242 243 NoTrans: []SgemvSubcase{ //(4x2, 4x1, 1x2, 1x1) 244 {alpha: 0, beta: 0, 245 want: []float32{0, 0, 0, 0, 0}, 246 wantRevX: []float32{0, 0, 0, 0, 0}, 247 wantRevY: []float32{0, 0, 0, 0, 0}, 248 wantRevXY: []float32{0, 0, 0, 0, 0}, 249 }, 250 {alpha: 0, beta: 1, 251 want: []float32{7, 8, 9, 10, 11}, 252 wantRevX: []float32{7, 8, 9, 10, 11}, 253 wantRevY: []float32{7, 8, 9, 10, 11}, 254 wantRevXY: []float32{7, 8, 9, 10, 11}, 255 }, 256 {alpha: 1, beta: 0, 257 want: []float32{40.8, 43.9, 33, 9, 28}, 258 wantRevX: []float32{32.8, 44.9, 47, 7, 36}, 259 wantRevY: []float32{28, 9, 33, 43.9, 40.8}, 260 wantRevXY: []float32{36, 7, 47, 44.9, 32.8}, 261 }, 262 {alpha: 8, beta: -6, 263 want: []float32{284.4, 303.2, 210, 12, 158}, 264 wantRevX: []float32{220.4, 311.2, 322, -4, 222}, 265 wantRevY: []float32{182, 24, 210, 291.2, 260.4}, 266 wantRevXY: []float32{246, 8, 322, 299.2, 196.4}, 267 }, 268 }, 269 270 Trans: []SgemvSubcase{ //( 2x4, 1x4, 2x1, 1x1) 271 {alpha: 0, beta: 0, 272 want: []float32{0, 0, 0}, 273 wantRevX: []float32{0, 0, 0}, 274 wantRevY: []float32{0, 0, 0}, 275 wantRevXY: []float32{0, 0, 0}, 276 }, 277 {alpha: 0, beta: 1, 278 want: []float32{1, 2, 3}, 279 wantRevX: []float32{1, 2, 3}, 280 wantRevY: []float32{1, 2, 3}, 281 wantRevXY: []float32{1, 2, 3}, 282 }, 283 {alpha: 1, beta: 0, 284 want: []float32{304.5, 166.4, 231.5}, 285 wantRevX: []float32{302.1, 188.2, 258.1}, 286 wantRevY: []float32{231.5, 166.4, 304.5}, 287 wantRevXY: []float32{258.1, 188.2, 302.1}, 288 }, 289 {alpha: 8, beta: -6, 290 want: []float32{2430, 1319.2, 1834}, 291 wantRevX: []float32{2410.8, 1493.6, 2046.8}, 292 wantRevY: []float32{1846, 1319.2, 2418}, 293 wantRevXY: []float32{2058.8, 1493.6, 2398.8}, 294 }, 295 }, 296 }, 297 298 { // 3x5 299 m: 3, 300 n: 5, 301 A: []float32{ 302 1.4, 2.34, 3.96, 0.96, 2.3, 303 3.43, 0.62, 1.09, 0.2, 3.56, 304 1.15, 0.58, 3.8, 1.16, 0.01, 305 }, 306 x: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, 307 y: []float32{2.46, 2.22, 4.75}, 308 309 NoTrans: []SgemvSubcase{ // (2x4, 2x1, 1x4, 1x1) 310 {alpha: 0, beta: 0, 311 want: []float32{0, 0, 0}, 312 wantRevX: []float32{0, 0, 0}, 313 wantRevY: []float32{0, 0, 0}, 314 wantRevXY: []float32{0, 0, 0}, 315 }, 316 {alpha: 0, beta: 1, 317 want: []float32{2.46, 2.22, 4.75}, 318 wantRevX: []float32{2.46, 2.22, 4.75}, 319 wantRevY: []float32{2.46, 2.22, 4.75}, 320 wantRevXY: []float32{2.46, 2.22, 4.75}, 321 }, 322 {alpha: 1, beta: 0, 323 want: []float32{37.8098, 28.8939, 22.5949}, 324 wantRevX: []float32{32.8088, 27.5978, 25.8927}, 325 wantRevY: []float32{22.5949, 28.8939, 37.8098}, 326 wantRevXY: []float32{25.8927, 27.5978, 32.8088}, 327 }, 328 {alpha: 8, beta: -6, 329 want: []float32{287.7184, 217.8312, 152.2592}, 330 wantRevX: []float32{247.7104, 207.4624, 178.6416}, 331 wantRevY: []float32{165.9992, 217.8312, 273.9784}, 332 wantRevXY: []float32{192.3816, 207.4624, 233.9704}, 333 }, 334 }, 335 336 Trans: []SgemvSubcase{ // (4x2, 1x2, 4x1, 1x1) 337 {alpha: 0, beta: 0, 338 want: []float32{0, 0, 0, 0, 0}, 339 wantRevX: []float32{0, 0, 0, 0, 0}, 340 wantRevY: []float32{0, 0, 0, 0, 0}, 341 wantRevXY: []float32{0, 0, 0, 0, 0}, 342 }, 343 {alpha: 0, beta: 1, 344 want: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, 345 wantRevX: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, 346 wantRevY: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, 347 wantRevXY: []float32{2.34, 2.82, 4.73, 0.22, 3.91}, 348 }, 349 {alpha: 1, beta: 0, 350 want: []float32{16.5211, 9.8878, 30.2114, 8.3156, 13.6087}, 351 wantRevX: []float32{17.0936, 13.9182, 30.5778, 7.8576, 18.8528}, 352 wantRevY: []float32{13.6087, 8.3156, 30.2114, 9.8878, 16.5211}, 353 wantRevXY: []float32{18.8528, 7.8576, 30.5778, 13.9182, 17.0936}, 354 }, 355 {alpha: 8, beta: -6, 356 want: []float32{118.1288, 62.1824, 213.3112, 65.2048, 85.4096}, 357 wantRevX: []float32{122.7088, 94.4256, 216.2424, 61.5408, 127.3624}, 358 wantRevY: []float32{94.8296, 49.6048, 213.3112, 77.7824, 108.7088}, 359 wantRevXY: []float32{136.7824, 45.9408, 216.2424, 110.0256, 113.2888}, 360 }, 361 }, 362 }, 363 364 { // 7x7 & nan test 365 m: 7, 366 n: 7, 367 A: []float32{ 368 0.9, 2.6, 0.5, 1.8, 2.3, 0.6, 0.2, 369 1.6, 0.6, 1.3, 2.1, 1.4, 0.4, 0.8, 370 2.9, 0.9, 2.3, 2.5, 1.4, 1.8, 1.6, 371 2.6, 2.8, 2.1, 0.3, nan, 2.2, 1.3, 372 0.2, 2.2, 1.8, 1.8, 2.1, 1.3, 1.4, 373 1.7, 1.4, 2.3, 2., 1., 0., 1.4, 374 2.1, 1.9, 0.8, 2.9, 1.3, 0.3, 1.3, 375 }, 376 x: []float32{0.4, 2.8, 3.5, 0.3, 0.6, 2.5, 3.1}, 377 y: []float32{3.2, 4.4, 5., 4.3, 4.1, 1.4, 0.2}, 378 379 NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) 380 {alpha: 0, beta: 0, 381 want: []float32{0, 0, 0, nan, 0, 0, 0}, 382 wantRevX: []float32{0, 0, 0, nan, 0, 0, 0}, 383 wantRevY: []float32{0, 0, 0, nan, 0, 0, 0}, 384 wantRevXY: []float32{0, 0, 0, nan, 0, 0, 0}, 385 }, 386 {alpha: 0, beta: 1, 387 want: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 388 wantRevX: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 389 wantRevY: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 390 wantRevXY: []float32{3.2, 4.4, 5., nan, 4.1, 1.4, 0.2}, 391 }, 392 {alpha: 1, beta: 0, 393 want: []float32{13.43, 11.82, 22.78, nan, 21.93, 18.19, 15.39}, 394 wantRevX: []float32{19.94, 14.21, 23.95, nan, 19.29, 14.81, 18.52}, 395 wantRevY: []float32{15.39, 18.19, 21.93, nan, 22.78, 11.82, 13.43}, 396 wantRevXY: []float32{18.52, 14.81, 19.29, nan, 23.95, 14.21, 19.94}, 397 }, 398 {alpha: 8, beta: -6, 399 want: []float32{88.24, 68.16, 152.24, nan, 150.84, 137.12, 121.92}, 400 wantRevX: []float32{140.32, 87.28, 161.6, nan, 129.72, 110.08, 146.96}, 401 wantRevY: []float32{103.92, 119.12, 145.44, nan, 157.64, 86.16, 106.24}, 402 wantRevXY: []float32{128.96, 92.08, 124.32, nan, 167., 105.28, 158.32}, 403 }, 404 }, 405 406 Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) 407 {alpha: 0, beta: 0, 408 want: []float32{0, 0, 0, 0, nan, 0, 0}, 409 wantRevX: []float32{0, 0, 0, 0, nan, 0, 0}, 410 wantRevY: []float32{0, 0, nan, 0, 0, 0, 0}, 411 wantRevXY: []float32{0, 0, nan, 0, 0, 0, 0}, 412 }, 413 {alpha: 0, beta: 1, 414 want: []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, 415 wantRevX: []float32{0.4, 2.8, 3.5, 0.3, nan, 2.5, 3.1}, 416 wantRevY: []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, 417 wantRevXY: []float32{0.4, 2.8, nan, 0.3, 0.6, 2.5, 3.1}, 418 }, 419 {alpha: 1, beta: 0, 420 want: []float32{39.22, 38.86, 38.61, 39.55, nan, 27.53, 25.71}, 421 wantRevX: []float32{40.69, 40.33, 42.06, 41.92, nan, 24.98, 30.63}, 422 wantRevY: []float32{25.71, 27.53, nan, 39.55, 38.61, 38.86, 39.22}, 423 wantRevXY: []float32{30.63, 24.98, nan, 41.92, 42.06, 40.33, 40.69}, 424 }, 425 {alpha: 8, beta: -6, 426 want: []float32{311.36, 294.08, 287.88, 314.6, nan, 205.24, 187.08}, 427 wantRevX: []float32{323.12, 305.84, 315.48, 333.56, nan, 184.84, 226.44}, 428 wantRevY: []float32{203.28, 203.44, nan, 314.6, 305.28, 295.88, 295.16}, 429 wantRevXY: []float32{242.64, 183.04, nan, 333.56, 332.88, 307.64, 306.92}, 430 }, 431 }, 432 }, 433 { // 11x11 434 m: 11, 435 n: 11, 436 A: []float32{ 437 0.4, 3., 2.5, 2., 0.4, 2., 2., 1., 0.1, 0.3, 2., 438 1.7, 0.7, 2.6, 1.6, 0.5, 2.4, 3., 0.9, 0.1, 2.8, 1.3, 439 1.1, 2.2, 1.5, 0.8, 2.9, 0.4, 0.5, 1.7, 0.8, 2.6, 0.7, 440 2.2, 1.7, 0.8, 2.9, 0.7, 0.7, 1.7, 1.8, 1.9, 2.4, 1.9, 441 0.3, 0.5, 1.6, 1.5, 1.5, 2.4, 1.7, 1.2, 1.9, 2.8, 1.2, 442 1.4, 2.2, 1.7, 1.4, 2.7, 1.4, 0.9, 1.8, 0.5, 1.2, 1.9, 443 0.8, 2.3, 1.7, 1.3, 2., 2.8, 2.6, 0.4, 2.5, 1.3, 0.5, 444 2.4, 2.8, 1.1, 0.2, 0.4, 2.8, 0.5, 0.5, 0., 2.8, 1.9, 445 2.3, 1.8, 2.3, 1.7, 1.1, 0.1, 1.4, 1.2, 1.9, 0.5, 0.6, 446 0.6, 2.4, 1.2, 0.3, 1.4, 1.3, 2.5, 2.6, 0., 1.3, 2.6, 447 0.7, 1.5, 0.2, 1.4, 1.1, 1.8, 0.2, 1., 1., 0.6, 1.2, 448 }, 449 x: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 450 y: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 451 452 NoTrans: []SgemvSubcase{ // (4x4, 4x2, 4x1, 2x4, 2x2, 2x1, 1x4, 1x2, 1x1) 453 {alpha: 0, beta: 0, 454 want: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 455 wantRevX: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 456 wantRevY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 457 wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 458 }, 459 {alpha: 0, beta: 1, 460 want: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 461 wantRevX: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 462 wantRevY: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 463 wantRevXY: []float32{3.8, 3.4, 1.6, 4.8, 4.3, 0.5, 2., 2.5, 1.5, 2.8, 3.9}, 464 }, 465 {alpha: 1, beta: 0, 466 want: []float32{32.71, 38.93, 33.55, 45.46, 39.24, 38.41, 46.23, 25.78, 37.33, 37.42, 24.63}, 467 wantRevX: []float32{39.82, 43.78, 37.73, 41.19, 40.17, 44.41, 42.75, 28.14, 35.6, 41.25, 23.9}, 468 wantRevY: []float32{24.63, 37.42, 37.33, 25.78, 46.23, 38.41, 39.24, 45.46, 33.55, 38.93, 32.71}, 469 wantRevXY: []float32{23.9, 41.25, 35.6, 28.14, 42.75, 44.41, 40.17, 41.19, 37.73, 43.78, 39.82}, 470 }, 471 {alpha: 8, beta: -6, 472 want: []float32{238.88, 291.04, 258.8, 334.88, 288.12, 304.28, 357.84, 191.24, 289.64, 282.56, 173.64}, 473 wantRevX: []float32{295.76, 329.84, 292.24, 300.72, 295.56, 352.28, 330., 210.12, 275.8, 313.2, 167.8}, 474 wantRevY: []float32{174.24, 278.96, 289.04, 177.44, 344.04, 304.28, 301.92, 348.68, 259.4, 294.64, 238.28}, 475 wantRevXY: []float32{168.4, 309.6, 275.2, 196.32, 316.2, 352.28, 309.36, 314.52, 292.84, 333.44, 295.16}, 476 }, 477 }, 478 479 Trans: []SgemvSubcase{ // (4x4, 2x4, 1x4, 4x2, 2x2, 1x2, 4x1, 2x1, 1x1) 480 {alpha: 0, beta: 0, 481 want: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 482 wantRevX: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 483 wantRevY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 484 wantRevXY: []float32{0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0}, 485 }, 486 {alpha: 0, beta: 1, 487 want: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 488 wantRevX: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 489 wantRevY: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 490 wantRevXY: []float32{2.5, 1.2, 0.8, 2.9, 3.4, 1.8, 4.6, 3.3, 3.8, 0.9, 1.1}, 491 }, 492 {alpha: 1, beta: 0, 493 want: []float32{37.07, 55.58, 46.05, 47.34, 33.88, 54.19, 50.85, 39.31, 31.29, 55.31, 46.98}, 494 wantRevX: []float32{38.11, 63.38, 46.44, 40.04, 34.63, 59.27, 50.13, 35.45, 28.26, 51.64, 46.22}, 495 wantRevY: []float32{46.98, 55.31, 31.29, 39.31, 50.85, 54.19, 33.88, 47.34, 46.05, 55.58, 37.07}, 496 wantRevXY: []float32{46.22, 51.64, 28.26, 35.45, 50.13, 59.27, 34.63, 40.04, 46.44, 63.38, 38.11}, 497 }, 498 {alpha: 8, beta: -6, 499 want: []float32{281.56, 437.44, 363.6, 361.32, 250.64, 422.72, 379.2, 294.68, 227.52, 437.08, 369.24}, 500 wantRevX: []float32{289.88, 499.84, 366.72, 302.92, 256.64, 463.36, 373.44, 263.8, 203.28, 407.72, 363.16}, 501 wantRevY: []float32{360.84, 435.28, 245.52, 297.08, 386.4, 422.72, 243.44, 358.92, 345.6, 439.24, 289.96}, 502 wantRevXY: []float32{354.76, 405.92, 221.28, 266.2, 380.64, 463.36, 249.44, 300.52, 348.72, 501.64, 298.28}, 503 }, 504 }, 505 }, 506 } 507 508 func TestGemv(t *testing.T) { 509 for _, test := range SgemvCases { 510 t.Run(fmt.Sprintf("(%vx%v)", test.m, test.n), func(tt *testing.T) { 511 for i, cas := range test.NoTrans { 512 tt.Run(fmt.Sprintf("NoTrans case %v", i), func(st *testing.T) { 513 sgemvcomp(st, test, false, cas, i) 514 }) 515 } 516 for i, cas := range test.Trans { 517 tt.Run(fmt.Sprintf("Trans case %v", i), func(st *testing.T) { 518 sgemvcomp(st, test, true, cas, i) 519 }) 520 } 521 }) 522 } 523 } 524 525 func sgemvcomp(t *testing.T, test SgemvCase, trans bool, cas SgemvSubcase, i int) { 526 const ( 527 tol = 1e-6 528 529 xGdVal, yGdVal, aGdVal = 0.5, 1.5, 10 530 gdLn = 4 531 ) 532 if trans { 533 test.x, test.y = test.y, test.x 534 } 535 prefix := fmt.Sprintf("Test (%vx%v) t:%v (a:%v,b:%v)", test.m, test.n, trans, cas.alpha, cas.beta) 536 xg, yg := guardVector(test.x, xGdVal, gdLn), guardVector(test.y, yGdVal, gdLn) 537 x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] 538 ag := guardVector(test.A, aGdVal, gdLn) 539 a := ag[gdLn : len(ag)-gdLn] 540 541 lda := uintptr(test.n) 542 if trans { 543 GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) 544 } else { 545 GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, a, lda, x, 1, cas.beta, y, 1) 546 } 547 for i := range cas.want { 548 if !sameApprox(y[i], cas.want[i], tol) { 549 t.Errorf(msgVal, prefix, i, y[i], cas.want[i]) 550 } 551 } 552 553 if !isValidGuard(xg, xGdVal, gdLn) { 554 t.Errorf(msgGuard, prefix, "x", xg[:gdLn], xg[len(xg)-gdLn:]) 555 } 556 if !isValidGuard(yg, yGdVal, gdLn) { 557 t.Errorf(msgGuard, prefix, "y", yg[:gdLn], yg[len(yg)-gdLn:]) 558 } 559 if !isValidGuard(ag, aGdVal, gdLn) { 560 t.Errorf(msgGuard, prefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) 561 } 562 if !equalStrided(test.x, x, 1) { 563 t.Errorf(msgReadOnly, prefix, "x") 564 } 565 if !equalStrided(test.A, a, 1) { 566 t.Errorf(msgReadOnly, prefix, "a") 567 } 568 569 for _, inc := range newIncSet(-1, 1, 2, 3, 90) { 570 incPrefix := fmt.Sprintf("%s inc(x:%v, y:%v)", prefix, inc.x, inc.y) 571 want, incY := cas.want, inc.y 572 switch { 573 case inc.x < 0 && inc.y < 0: 574 want = cas.wantRevXY 575 incY = -inc.y 576 case inc.x < 0: 577 want = cas.wantRevX 578 case inc.y < 0: 579 want = cas.wantRevY 580 incY = -inc.y 581 } 582 xg, yg := guardIncVector(test.x, xGdVal, inc.x, gdLn), guardIncVector(test.y, yGdVal, inc.y, gdLn) 583 x, y := xg[gdLn:len(xg)-gdLn], yg[gdLn:len(yg)-gdLn] 584 ag := guardVector(test.A, aGdVal, gdLn) 585 a := ag[gdLn : len(ag)-gdLn] 586 587 if trans { 588 GemvT(uintptr(test.m), uintptr(test.n), cas.alpha, 589 a, lda, x, uintptr(inc.x), 590 cas.beta, y, uintptr(inc.y)) 591 } else { 592 GemvN(uintptr(test.m), uintptr(test.n), cas.alpha, 593 a, lda, x, uintptr(inc.x), 594 cas.beta, y, uintptr(inc.y)) 595 } 596 for i := range want { 597 if !sameApprox(y[i*incY], want[i], tol) { 598 t.Errorf(msgVal, incPrefix, i, y[i*incY], want[i]) 599 t.Error(y[i*incY] - want[i]) 600 } 601 } 602 603 checkValidIncGuard(t, xg, xGdVal, inc.x, gdLn) 604 checkValidIncGuard(t, yg, yGdVal, inc.y, gdLn) 605 if !isValidGuard(ag, aGdVal, gdLn) { 606 t.Errorf(msgGuard, incPrefix, "a", ag[:gdLn], ag[len(ag)-gdLn:]) 607 } 608 if !equalStrided(test.x, x, inc.x) { 609 t.Errorf(msgReadOnly, incPrefix, "x") 610 } 611 if !equalStrided(test.A, a, 1) { 612 t.Errorf(msgReadOnly, incPrefix, "a") 613 } 614 } 615 } 616 617 // equalStrided returns true if the strided vector x contains elements of the 618 // dense vector ref at indices i*inc, false otherwise. 619 func equalStrided(ref, x []float32, inc int) bool { 620 if inc < 0 { 621 inc = -inc 622 } 623 for i, v := range ref { 624 if !scalarSame(x[i*inc], v) { 625 return false 626 } 627 } 628 return true 629 } 630 631 func scalarSame(a, b float32) bool { 632 return a == b || (math32.IsNaN(a) && math32.IsNaN(b)) 633 }