github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level3float32.go (about) 1 // Code generated by "go generate github.com/jingcheng-WU/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2014 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 "github.com/jingcheng-WU/gonum/blas" 11 "github.com/jingcheng-WU/gonum/internal/asm/f32" 12 ) 13 14 var _ blas.Float32Level3 = Implementation{} 15 16 // Strsm solves one of the matrix equations 17 // A * X = alpha * B if tA == blas.NoTrans and side == blas.Left 18 // Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 19 // X * A = alpha * B if tA == blas.NoTrans and side == blas.Right 20 // X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 21 // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a 22 // scalar. 23 // 24 // At entry to the function, X contains the values of B, and the result is 25 // stored in-place into X. 26 // 27 // No check is made that A is invertible. 28 // 29 // Float32 implementations are autogenerated and not directly tested. 30 func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { 31 if s != blas.Left && s != blas.Right { 32 panic(badSide) 33 } 34 if ul != blas.Lower && ul != blas.Upper { 35 panic(badUplo) 36 } 37 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 38 panic(badTranspose) 39 } 40 if d != blas.NonUnit && d != blas.Unit { 41 panic(badDiag) 42 } 43 if m < 0 { 44 panic(mLT0) 45 } 46 if n < 0 { 47 panic(nLT0) 48 } 49 k := n 50 if s == blas.Left { 51 k = m 52 } 53 if lda < max(1, k) { 54 panic(badLdA) 55 } 56 if ldb < max(1, n) { 57 panic(badLdB) 58 } 59 60 // Quick return if possible. 61 if m == 0 || n == 0 { 62 return 63 } 64 65 // For zero matrix size the following slice length checks are trivially satisfied. 66 if len(a) < lda*(k-1)+k { 67 panic(shortA) 68 } 69 if len(b) < ldb*(m-1)+n { 70 panic(shortB) 71 } 72 73 if alpha == 0 { 74 for i := 0; i < m; i++ { 75 btmp := b[i*ldb : i*ldb+n] 76 for j := range btmp { 77 btmp[j] = 0 78 } 79 } 80 return 81 } 82 nonUnit := d == blas.NonUnit 83 if s == blas.Left { 84 if tA == blas.NoTrans { 85 if ul == blas.Upper { 86 for i := m - 1; i >= 0; i-- { 87 btmp := b[i*ldb : i*ldb+n] 88 if alpha != 1 { 89 f32.ScalUnitary(alpha, btmp) 90 } 91 for ka, va := range a[i*lda+i+1 : i*lda+m] { 92 if va != 0 { 93 k := ka + i + 1 94 f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 95 } 96 } 97 if nonUnit { 98 tmp := 1 / a[i*lda+i] 99 f32.ScalUnitary(tmp, btmp) 100 } 101 } 102 return 103 } 104 for i := 0; i < m; i++ { 105 btmp := b[i*ldb : i*ldb+n] 106 if alpha != 1 { 107 f32.ScalUnitary(alpha, btmp) 108 } 109 for k, va := range a[i*lda : i*lda+i] { 110 if va != 0 { 111 f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 112 } 113 } 114 if nonUnit { 115 tmp := 1 / a[i*lda+i] 116 f32.ScalUnitary(tmp, btmp) 117 } 118 } 119 return 120 } 121 // Cases where a is transposed 122 if ul == blas.Upper { 123 for k := 0; k < m; k++ { 124 btmpk := b[k*ldb : k*ldb+n] 125 if nonUnit { 126 tmp := 1 / a[k*lda+k] 127 f32.ScalUnitary(tmp, btmpk) 128 } 129 for ia, va := range a[k*lda+k+1 : k*lda+m] { 130 if va != 0 { 131 i := ia + k + 1 132 f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 133 } 134 } 135 if alpha != 1 { 136 f32.ScalUnitary(alpha, btmpk) 137 } 138 } 139 return 140 } 141 for k := m - 1; k >= 0; k-- { 142 btmpk := b[k*ldb : k*ldb+n] 143 if nonUnit { 144 tmp := 1 / a[k*lda+k] 145 f32.ScalUnitary(tmp, btmpk) 146 } 147 for i, va := range a[k*lda : k*lda+k] { 148 if va != 0 { 149 f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 150 } 151 } 152 if alpha != 1 { 153 f32.ScalUnitary(alpha, btmpk) 154 } 155 } 156 return 157 } 158 // Cases where a is to the right of X. 159 if tA == blas.NoTrans { 160 if ul == blas.Upper { 161 for i := 0; i < m; i++ { 162 btmp := b[i*ldb : i*ldb+n] 163 if alpha != 1 { 164 f32.ScalUnitary(alpha, btmp) 165 } 166 for k, vb := range btmp { 167 if vb == 0 { 168 continue 169 } 170 if nonUnit { 171 btmp[k] /= a[k*lda+k] 172 } 173 f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 174 } 175 } 176 return 177 } 178 for i := 0; i < m; i++ { 179 btmp := b[i*ldb : i*ldb+n] 180 if alpha != 1 { 181 f32.ScalUnitary(alpha, btmp) 182 } 183 for k := n - 1; k >= 0; k-- { 184 if btmp[k] == 0 { 185 continue 186 } 187 if nonUnit { 188 btmp[k] /= a[k*lda+k] 189 } 190 f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k]) 191 } 192 } 193 return 194 } 195 // Cases where a is transposed. 196 if ul == blas.Upper { 197 for i := 0; i < m; i++ { 198 btmp := b[i*ldb : i*ldb+n] 199 for j := n - 1; j >= 0; j-- { 200 tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) 201 if nonUnit { 202 tmp /= a[j*lda+j] 203 } 204 btmp[j] = tmp 205 } 206 } 207 return 208 } 209 for i := 0; i < m; i++ { 210 btmp := b[i*ldb : i*ldb+n] 211 for j := 0; j < n; j++ { 212 tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 213 if nonUnit { 214 tmp /= a[j*lda+j] 215 } 216 btmp[j] = tmp 217 } 218 } 219 } 220 221 // Ssymm performs one of the matrix-matrix operations 222 // C = alpha * A * B + beta * C if side == blas.Left 223 // C = alpha * B * A + beta * C if side == blas.Right 224 // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha 225 // is a scalar. 226 // 227 // Float32 implementations are autogenerated and not directly tested. 228 func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { 229 if s != blas.Right && s != blas.Left { 230 panic(badSide) 231 } 232 if ul != blas.Lower && ul != blas.Upper { 233 panic(badUplo) 234 } 235 if m < 0 { 236 panic(mLT0) 237 } 238 if n < 0 { 239 panic(nLT0) 240 } 241 k := n 242 if s == blas.Left { 243 k = m 244 } 245 if lda < max(1, k) { 246 panic(badLdA) 247 } 248 if ldb < max(1, n) { 249 panic(badLdB) 250 } 251 if ldc < max(1, n) { 252 panic(badLdC) 253 } 254 255 // Quick return if possible. 256 if m == 0 || n == 0 { 257 return 258 } 259 260 // For zero matrix size the following slice length checks are trivially satisfied. 261 if len(a) < lda*(k-1)+k { 262 panic(shortA) 263 } 264 if len(b) < ldb*(m-1)+n { 265 panic(shortB) 266 } 267 if len(c) < ldc*(m-1)+n { 268 panic(shortC) 269 } 270 271 // Quick return if possible. 272 if alpha == 0 && beta == 1 { 273 return 274 } 275 276 if beta == 0 { 277 for i := 0; i < m; i++ { 278 ctmp := c[i*ldc : i*ldc+n] 279 for j := range ctmp { 280 ctmp[j] = 0 281 } 282 } 283 } 284 285 if alpha == 0 { 286 if beta != 0 { 287 for i := 0; i < m; i++ { 288 ctmp := c[i*ldc : i*ldc+n] 289 for j := 0; j < n; j++ { 290 ctmp[j] *= beta 291 } 292 } 293 } 294 return 295 } 296 297 isUpper := ul == blas.Upper 298 if s == blas.Left { 299 for i := 0; i < m; i++ { 300 atmp := alpha * a[i*lda+i] 301 btmp := b[i*ldb : i*ldb+n] 302 ctmp := c[i*ldc : i*ldc+n] 303 for j, v := range btmp { 304 ctmp[j] *= beta 305 ctmp[j] += atmp * v 306 } 307 308 for k := 0; k < i; k++ { 309 var atmp float32 310 if isUpper { 311 atmp = a[k*lda+i] 312 } else { 313 atmp = a[i*lda+k] 314 } 315 atmp *= alpha 316 f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 317 } 318 for k := i + 1; k < m; k++ { 319 var atmp float32 320 if isUpper { 321 atmp = a[i*lda+k] 322 } else { 323 atmp = a[k*lda+i] 324 } 325 atmp *= alpha 326 f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 327 } 328 } 329 return 330 } 331 if isUpper { 332 for i := 0; i < m; i++ { 333 for j := n - 1; j >= 0; j-- { 334 tmp := alpha * b[i*ldb+j] 335 var tmp2 float32 336 atmp := a[j*lda+j+1 : j*lda+n] 337 btmp := b[i*ldb+j+1 : i*ldb+n] 338 ctmp := c[i*ldc+j+1 : i*ldc+n] 339 for k, v := range atmp { 340 ctmp[k] += tmp * v 341 tmp2 += btmp[k] * v 342 } 343 c[i*ldc+j] *= beta 344 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 345 } 346 } 347 return 348 } 349 for i := 0; i < m; i++ { 350 for j := 0; j < n; j++ { 351 tmp := alpha * b[i*ldb+j] 352 var tmp2 float32 353 atmp := a[j*lda : j*lda+j] 354 btmp := b[i*ldb : i*ldb+j] 355 ctmp := c[i*ldc : i*ldc+j] 356 for k, v := range atmp { 357 ctmp[k] += tmp * v 358 tmp2 += btmp[k] * v 359 } 360 c[i*ldc+j] *= beta 361 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 362 } 363 } 364 } 365 366 // Ssyrk performs one of the symmetric rank-k operations 367 // C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans 368 // C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 369 // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and 370 // beta are scalars. 371 // 372 // Float32 implementations are autogenerated and not directly tested. 373 func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) { 374 if ul != blas.Lower && ul != blas.Upper { 375 panic(badUplo) 376 } 377 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 378 panic(badTranspose) 379 } 380 if n < 0 { 381 panic(nLT0) 382 } 383 if k < 0 { 384 panic(kLT0) 385 } 386 row, col := k, n 387 if tA == blas.NoTrans { 388 row, col = n, k 389 } 390 if lda < max(1, col) { 391 panic(badLdA) 392 } 393 if ldc < max(1, n) { 394 panic(badLdC) 395 } 396 397 // Quick return if possible. 398 if n == 0 { 399 return 400 } 401 402 // For zero matrix size the following slice length checks are trivially satisfied. 403 if len(a) < lda*(row-1)+col { 404 panic(shortA) 405 } 406 if len(c) < ldc*(n-1)+n { 407 panic(shortC) 408 } 409 410 if alpha == 0 { 411 if beta == 0 { 412 if ul == blas.Upper { 413 for i := 0; i < n; i++ { 414 ctmp := c[i*ldc+i : i*ldc+n] 415 for j := range ctmp { 416 ctmp[j] = 0 417 } 418 } 419 return 420 } 421 for i := 0; i < n; i++ { 422 ctmp := c[i*ldc : i*ldc+i+1] 423 for j := range ctmp { 424 ctmp[j] = 0 425 } 426 } 427 return 428 } 429 if ul == blas.Upper { 430 for i := 0; i < n; i++ { 431 ctmp := c[i*ldc+i : i*ldc+n] 432 for j := range ctmp { 433 ctmp[j] *= beta 434 } 435 } 436 return 437 } 438 for i := 0; i < n; i++ { 439 ctmp := c[i*ldc : i*ldc+i+1] 440 for j := range ctmp { 441 ctmp[j] *= beta 442 } 443 } 444 return 445 } 446 if tA == blas.NoTrans { 447 if ul == blas.Upper { 448 for i := 0; i < n; i++ { 449 ctmp := c[i*ldc+i : i*ldc+n] 450 atmp := a[i*lda : i*lda+k] 451 if beta == 0 { 452 for jc := range ctmp { 453 j := jc + i 454 ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k]) 455 } 456 } else { 457 for jc, vc := range ctmp { 458 j := jc + i 459 ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k]) 460 } 461 } 462 } 463 return 464 } 465 for i := 0; i < n; i++ { 466 ctmp := c[i*ldc : i*ldc+i+1] 467 atmp := a[i*lda : i*lda+k] 468 if beta == 0 { 469 for j := range ctmp { 470 ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp) 471 } 472 } else { 473 for j, vc := range ctmp { 474 ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp) 475 } 476 } 477 } 478 return 479 } 480 // Cases where a is transposed. 481 if ul == blas.Upper { 482 for i := 0; i < n; i++ { 483 ctmp := c[i*ldc+i : i*ldc+n] 484 if beta == 0 { 485 for j := range ctmp { 486 ctmp[j] = 0 487 } 488 } else if beta != 1 { 489 for j := range ctmp { 490 ctmp[j] *= beta 491 } 492 } 493 for l := 0; l < k; l++ { 494 tmp := alpha * a[l*lda+i] 495 if tmp != 0 { 496 f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp) 497 } 498 } 499 } 500 return 501 } 502 for i := 0; i < n; i++ { 503 ctmp := c[i*ldc : i*ldc+i+1] 504 if beta != 1 { 505 for j := range ctmp { 506 ctmp[j] *= beta 507 } 508 } 509 for l := 0; l < k; l++ { 510 tmp := alpha * a[l*lda+i] 511 if tmp != 0 { 512 f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp) 513 } 514 } 515 } 516 } 517 518 // Ssyr2k performs one of the symmetric rank 2k operations 519 // C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans 520 // C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 521 // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and 522 // alpha and beta are scalars. 523 // 524 // Float32 implementations are autogenerated and not directly tested. 525 func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { 526 if ul != blas.Lower && ul != blas.Upper { 527 panic(badUplo) 528 } 529 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 530 panic(badTranspose) 531 } 532 if n < 0 { 533 panic(nLT0) 534 } 535 if k < 0 { 536 panic(kLT0) 537 } 538 row, col := k, n 539 if tA == blas.NoTrans { 540 row, col = n, k 541 } 542 if lda < max(1, col) { 543 panic(badLdA) 544 } 545 if ldb < max(1, col) { 546 panic(badLdB) 547 } 548 if ldc < max(1, n) { 549 panic(badLdC) 550 } 551 552 // Quick return if possible. 553 if n == 0 { 554 return 555 } 556 557 // For zero matrix size the following slice length checks are trivially satisfied. 558 if len(a) < lda*(row-1)+col { 559 panic(shortA) 560 } 561 if len(b) < ldb*(row-1)+col { 562 panic(shortB) 563 } 564 if len(c) < ldc*(n-1)+n { 565 panic(shortC) 566 } 567 568 if alpha == 0 { 569 if beta == 0 { 570 if ul == blas.Upper { 571 for i := 0; i < n; i++ { 572 ctmp := c[i*ldc+i : i*ldc+n] 573 for j := range ctmp { 574 ctmp[j] = 0 575 } 576 } 577 return 578 } 579 for i := 0; i < n; i++ { 580 ctmp := c[i*ldc : i*ldc+i+1] 581 for j := range ctmp { 582 ctmp[j] = 0 583 } 584 } 585 return 586 } 587 if ul == blas.Upper { 588 for i := 0; i < n; i++ { 589 ctmp := c[i*ldc+i : i*ldc+n] 590 for j := range ctmp { 591 ctmp[j] *= beta 592 } 593 } 594 return 595 } 596 for i := 0; i < n; i++ { 597 ctmp := c[i*ldc : i*ldc+i+1] 598 for j := range ctmp { 599 ctmp[j] *= beta 600 } 601 } 602 return 603 } 604 if tA == blas.NoTrans { 605 if ul == blas.Upper { 606 for i := 0; i < n; i++ { 607 atmp := a[i*lda : i*lda+k] 608 btmp := b[i*ldb : i*ldb+k] 609 ctmp := c[i*ldc+i : i*ldc+n] 610 if beta == 0 { 611 for jc := range ctmp { 612 j := i + jc 613 var tmp1, tmp2 float32 614 binner := b[j*ldb : j*ldb+k] 615 for l, v := range a[j*lda : j*lda+k] { 616 tmp1 += v * btmp[l] 617 tmp2 += atmp[l] * binner[l] 618 } 619 ctmp[jc] = alpha * (tmp1 + tmp2) 620 } 621 } else { 622 for jc := range ctmp { 623 j := i + jc 624 var tmp1, tmp2 float32 625 binner := b[j*ldb : j*ldb+k] 626 for l, v := range a[j*lda : j*lda+k] { 627 tmp1 += v * btmp[l] 628 tmp2 += atmp[l] * binner[l] 629 } 630 ctmp[jc] *= beta 631 ctmp[jc] += alpha * (tmp1 + tmp2) 632 } 633 } 634 } 635 return 636 } 637 for i := 0; i < n; i++ { 638 atmp := a[i*lda : i*lda+k] 639 btmp := b[i*ldb : i*ldb+k] 640 ctmp := c[i*ldc : i*ldc+i+1] 641 if beta == 0 { 642 for j := 0; j <= i; j++ { 643 var tmp1, tmp2 float32 644 binner := b[j*ldb : j*ldb+k] 645 for l, v := range a[j*lda : j*lda+k] { 646 tmp1 += v * btmp[l] 647 tmp2 += atmp[l] * binner[l] 648 } 649 ctmp[j] = alpha * (tmp1 + tmp2) 650 } 651 } else { 652 for j := 0; j <= i; j++ { 653 var tmp1, tmp2 float32 654 binner := b[j*ldb : j*ldb+k] 655 for l, v := range a[j*lda : j*lda+k] { 656 tmp1 += v * btmp[l] 657 tmp2 += atmp[l] * binner[l] 658 } 659 ctmp[j] *= beta 660 ctmp[j] += alpha * (tmp1 + tmp2) 661 } 662 } 663 } 664 return 665 } 666 if ul == blas.Upper { 667 for i := 0; i < n; i++ { 668 ctmp := c[i*ldc+i : i*ldc+n] 669 switch beta { 670 case 0: 671 for j := range ctmp { 672 ctmp[j] = 0 673 } 674 case 1: 675 default: 676 for j := range ctmp { 677 ctmp[j] *= beta 678 } 679 } 680 for l := 0; l < k; l++ { 681 tmp1 := alpha * b[l*ldb+i] 682 tmp2 := alpha * a[l*lda+i] 683 btmp := b[l*ldb+i : l*ldb+n] 684 if tmp1 != 0 || tmp2 != 0 { 685 for j, v := range a[l*lda+i : l*lda+n] { 686 ctmp[j] += v*tmp1 + btmp[j]*tmp2 687 } 688 } 689 } 690 } 691 return 692 } 693 for i := 0; i < n; i++ { 694 ctmp := c[i*ldc : i*ldc+i+1] 695 switch beta { 696 case 0: 697 for j := range ctmp { 698 ctmp[j] = 0 699 } 700 case 1: 701 default: 702 for j := range ctmp { 703 ctmp[j] *= beta 704 } 705 } 706 for l := 0; l < k; l++ { 707 tmp1 := alpha * b[l*ldb+i] 708 tmp2 := alpha * a[l*lda+i] 709 btmp := b[l*ldb : l*ldb+i+1] 710 if tmp1 != 0 || tmp2 != 0 { 711 for j, v := range a[l*lda : l*lda+i+1] { 712 ctmp[j] += v*tmp1 + btmp[j]*tmp2 713 } 714 } 715 } 716 } 717 } 718 719 // Strmm performs one of the matrix-matrix operations 720 // B = alpha * A * B if tA == blas.NoTrans and side == blas.Left 721 // B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 722 // B = alpha * B * A if tA == blas.NoTrans and side == blas.Right 723 // B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 724 // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar. 725 // 726 // Float32 implementations are autogenerated and not directly tested. 727 func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { 728 if s != blas.Left && s != blas.Right { 729 panic(badSide) 730 } 731 if ul != blas.Lower && ul != blas.Upper { 732 panic(badUplo) 733 } 734 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 735 panic(badTranspose) 736 } 737 if d != blas.NonUnit && d != blas.Unit { 738 panic(badDiag) 739 } 740 if m < 0 { 741 panic(mLT0) 742 } 743 if n < 0 { 744 panic(nLT0) 745 } 746 k := n 747 if s == blas.Left { 748 k = m 749 } 750 if lda < max(1, k) { 751 panic(badLdA) 752 } 753 if ldb < max(1, n) { 754 panic(badLdB) 755 } 756 757 // Quick return if possible. 758 if m == 0 || n == 0 { 759 return 760 } 761 762 // For zero matrix size the following slice length checks are trivially satisfied. 763 if len(a) < lda*(k-1)+k { 764 panic(shortA) 765 } 766 if len(b) < ldb*(m-1)+n { 767 panic(shortB) 768 } 769 770 if alpha == 0 { 771 for i := 0; i < m; i++ { 772 btmp := b[i*ldb : i*ldb+n] 773 for j := range btmp { 774 btmp[j] = 0 775 } 776 } 777 return 778 } 779 780 nonUnit := d == blas.NonUnit 781 if s == blas.Left { 782 if tA == blas.NoTrans { 783 if ul == blas.Upper { 784 for i := 0; i < m; i++ { 785 tmp := alpha 786 if nonUnit { 787 tmp *= a[i*lda+i] 788 } 789 btmp := b[i*ldb : i*ldb+n] 790 f32.ScalUnitary(tmp, btmp) 791 for ka, va := range a[i*lda+i+1 : i*lda+m] { 792 k := ka + i + 1 793 if va != 0 { 794 f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 795 } 796 } 797 } 798 return 799 } 800 for i := m - 1; i >= 0; i-- { 801 tmp := alpha 802 if nonUnit { 803 tmp *= a[i*lda+i] 804 } 805 btmp := b[i*ldb : i*ldb+n] 806 f32.ScalUnitary(tmp, btmp) 807 for k, va := range a[i*lda : i*lda+i] { 808 if va != 0 { 809 f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 810 } 811 } 812 } 813 return 814 } 815 // Cases where a is transposed. 816 if ul == blas.Upper { 817 for k := m - 1; k >= 0; k-- { 818 btmpk := b[k*ldb : k*ldb+n] 819 for ia, va := range a[k*lda+k+1 : k*lda+m] { 820 i := ia + k + 1 821 btmp := b[i*ldb : i*ldb+n] 822 if va != 0 { 823 f32.AxpyUnitary(alpha*va, btmpk, btmp) 824 } 825 } 826 tmp := alpha 827 if nonUnit { 828 tmp *= a[k*lda+k] 829 } 830 if tmp != 1 { 831 f32.ScalUnitary(tmp, btmpk) 832 } 833 } 834 return 835 } 836 for k := 0; k < m; k++ { 837 btmpk := b[k*ldb : k*ldb+n] 838 for i, va := range a[k*lda : k*lda+k] { 839 btmp := b[i*ldb : i*ldb+n] 840 if va != 0 { 841 f32.AxpyUnitary(alpha*va, btmpk, btmp) 842 } 843 } 844 tmp := alpha 845 if nonUnit { 846 tmp *= a[k*lda+k] 847 } 848 if tmp != 1 { 849 f32.ScalUnitary(tmp, btmpk) 850 } 851 } 852 return 853 } 854 // Cases where a is on the right 855 if tA == blas.NoTrans { 856 if ul == blas.Upper { 857 for i := 0; i < m; i++ { 858 btmp := b[i*ldb : i*ldb+n] 859 for k := n - 1; k >= 0; k-- { 860 tmp := alpha * btmp[k] 861 if tmp == 0 { 862 continue 863 } 864 btmp[k] = tmp 865 if nonUnit { 866 btmp[k] *= a[k*lda+k] 867 } 868 f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 869 } 870 } 871 return 872 } 873 for i := 0; i < m; i++ { 874 btmp := b[i*ldb : i*ldb+n] 875 for k := 0; k < n; k++ { 876 tmp := alpha * btmp[k] 877 if tmp == 0 { 878 continue 879 } 880 btmp[k] = tmp 881 if nonUnit { 882 btmp[k] *= a[k*lda+k] 883 } 884 f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k]) 885 } 886 } 887 return 888 } 889 // Cases where a is transposed. 890 if ul == blas.Upper { 891 for i := 0; i < m; i++ { 892 btmp := b[i*ldb : i*ldb+n] 893 for j, vb := range btmp { 894 tmp := vb 895 if nonUnit { 896 tmp *= a[j*lda+j] 897 } 898 tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) 899 btmp[j] = alpha * tmp 900 } 901 } 902 return 903 } 904 for i := 0; i < m; i++ { 905 btmp := b[i*ldb : i*ldb+n] 906 for j := n - 1; j >= 0; j-- { 907 tmp := btmp[j] 908 if nonUnit { 909 tmp *= a[j*lda+j] 910 } 911 tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 912 btmp[j] = alpha * tmp 913 } 914 } 915 }