gonum.org/v1/gonum@v0.14.0/blas/gonum/level3float32.go (about) 1 // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2014 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/internal/asm/f32" 12 ) 13 14 var _ blas.Float32Level3 = Implementation{} 15 16 // Strsm solves one of the matrix equations 17 // 18 // A * X = alpha * B if tA == blas.NoTrans and side == blas.Left 19 // Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 20 // X * A = alpha * B if tA == blas.NoTrans and side == blas.Right 21 // X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 22 // 23 // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a 24 // scalar. 25 // 26 // At entry to the function, X contains the values of B, and the result is 27 // stored in-place into X. 28 // 29 // No check is made that A is invertible. 30 // 31 // Float32 implementations are autogenerated and not directly tested. 32 func (Implementation) Strsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { 33 if s != blas.Left && s != blas.Right { 34 panic(badSide) 35 } 36 if ul != blas.Lower && ul != blas.Upper { 37 panic(badUplo) 38 } 39 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 40 panic(badTranspose) 41 } 42 if d != blas.NonUnit && d != blas.Unit { 43 panic(badDiag) 44 } 45 if m < 0 { 46 panic(mLT0) 47 } 48 if n < 0 { 49 panic(nLT0) 50 } 51 k := n 52 if s == blas.Left { 53 k = m 54 } 55 if lda < max(1, k) { 56 panic(badLdA) 57 } 58 if ldb < max(1, n) { 59 panic(badLdB) 60 } 61 62 // Quick return if possible. 63 if m == 0 || n == 0 { 64 return 65 } 66 67 // For zero matrix size the following slice length checks are trivially satisfied. 68 if len(a) < lda*(k-1)+k { 69 panic(shortA) 70 } 71 if len(b) < ldb*(m-1)+n { 72 panic(shortB) 73 } 74 75 if alpha == 0 { 76 for i := 0; i < m; i++ { 77 btmp := b[i*ldb : i*ldb+n] 78 for j := range btmp { 79 btmp[j] = 0 80 } 81 } 82 return 83 } 84 nonUnit := d == blas.NonUnit 85 if s == blas.Left { 86 if tA == blas.NoTrans { 87 if ul == blas.Upper { 88 for i := m - 1; i >= 0; i-- { 89 btmp := b[i*ldb : i*ldb+n] 90 if alpha != 1 { 91 f32.ScalUnitary(alpha, btmp) 92 } 93 for ka, va := range a[i*lda+i+1 : i*lda+m] { 94 if va != 0 { 95 k := ka + i + 1 96 f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 97 } 98 } 99 if nonUnit { 100 tmp := 1 / a[i*lda+i] 101 f32.ScalUnitary(tmp, btmp) 102 } 103 } 104 return 105 } 106 for i := 0; i < m; i++ { 107 btmp := b[i*ldb : i*ldb+n] 108 if alpha != 1 { 109 f32.ScalUnitary(alpha, btmp) 110 } 111 for k, va := range a[i*lda : i*lda+i] { 112 if va != 0 { 113 f32.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 114 } 115 } 116 if nonUnit { 117 tmp := 1 / a[i*lda+i] 118 f32.ScalUnitary(tmp, btmp) 119 } 120 } 121 return 122 } 123 // Cases where a is transposed 124 if ul == blas.Upper { 125 for k := 0; k < m; k++ { 126 btmpk := b[k*ldb : k*ldb+n] 127 if nonUnit { 128 tmp := 1 / a[k*lda+k] 129 f32.ScalUnitary(tmp, btmpk) 130 } 131 for ia, va := range a[k*lda+k+1 : k*lda+m] { 132 if va != 0 { 133 i := ia + k + 1 134 f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 135 } 136 } 137 if alpha != 1 { 138 f32.ScalUnitary(alpha, btmpk) 139 } 140 } 141 return 142 } 143 for k := m - 1; k >= 0; k-- { 144 btmpk := b[k*ldb : k*ldb+n] 145 if nonUnit { 146 tmp := 1 / a[k*lda+k] 147 f32.ScalUnitary(tmp, btmpk) 148 } 149 for i, va := range a[k*lda : k*lda+k] { 150 if va != 0 { 151 f32.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 152 } 153 } 154 if alpha != 1 { 155 f32.ScalUnitary(alpha, btmpk) 156 } 157 } 158 return 159 } 160 // Cases where a is to the right of X. 161 if tA == blas.NoTrans { 162 if ul == blas.Upper { 163 for i := 0; i < m; i++ { 164 btmp := b[i*ldb : i*ldb+n] 165 if alpha != 1 { 166 f32.ScalUnitary(alpha, btmp) 167 } 168 for k, vb := range btmp { 169 if vb == 0 { 170 continue 171 } 172 if nonUnit { 173 btmp[k] /= a[k*lda+k] 174 } 175 f32.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 176 } 177 } 178 return 179 } 180 for i := 0; i < m; i++ { 181 btmp := b[i*ldb : i*ldb+n] 182 if alpha != 1 { 183 f32.ScalUnitary(alpha, btmp) 184 } 185 for k := n - 1; k >= 0; k-- { 186 if btmp[k] == 0 { 187 continue 188 } 189 if nonUnit { 190 btmp[k] /= a[k*lda+k] 191 } 192 f32.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k]) 193 } 194 } 195 return 196 } 197 // Cases where a is transposed. 198 if ul == blas.Upper { 199 for i := 0; i < m; i++ { 200 btmp := b[i*ldb : i*ldb+n] 201 for j := n - 1; j >= 0; j-- { 202 tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) 203 if nonUnit { 204 tmp /= a[j*lda+j] 205 } 206 btmp[j] = tmp 207 } 208 } 209 return 210 } 211 for i := 0; i < m; i++ { 212 btmp := b[i*ldb : i*ldb+n] 213 for j := 0; j < n; j++ { 214 tmp := alpha*btmp[j] - f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 215 if nonUnit { 216 tmp /= a[j*lda+j] 217 } 218 btmp[j] = tmp 219 } 220 } 221 } 222 223 // Ssymm performs one of the matrix-matrix operations 224 // 225 // C = alpha * A * B + beta * C if side == blas.Left 226 // C = alpha * B * A + beta * C if side == blas.Right 227 // 228 // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha 229 // is a scalar. 230 // 231 // Float32 implementations are autogenerated and not directly tested. 232 func (Implementation) Ssymm(s blas.Side, ul blas.Uplo, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { 233 if s != blas.Right && s != blas.Left { 234 panic(badSide) 235 } 236 if ul != blas.Lower && ul != blas.Upper { 237 panic(badUplo) 238 } 239 if m < 0 { 240 panic(mLT0) 241 } 242 if n < 0 { 243 panic(nLT0) 244 } 245 k := n 246 if s == blas.Left { 247 k = m 248 } 249 if lda < max(1, k) { 250 panic(badLdA) 251 } 252 if ldb < max(1, n) { 253 panic(badLdB) 254 } 255 if ldc < max(1, n) { 256 panic(badLdC) 257 } 258 259 // Quick return if possible. 260 if m == 0 || n == 0 { 261 return 262 } 263 264 // For zero matrix size the following slice length checks are trivially satisfied. 265 if len(a) < lda*(k-1)+k { 266 panic(shortA) 267 } 268 if len(b) < ldb*(m-1)+n { 269 panic(shortB) 270 } 271 if len(c) < ldc*(m-1)+n { 272 panic(shortC) 273 } 274 275 // Quick return if possible. 276 if alpha == 0 && beta == 1 { 277 return 278 } 279 280 if beta == 0 { 281 for i := 0; i < m; i++ { 282 ctmp := c[i*ldc : i*ldc+n] 283 for j := range ctmp { 284 ctmp[j] = 0 285 } 286 } 287 } 288 289 if alpha == 0 { 290 if beta != 0 { 291 for i := 0; i < m; i++ { 292 ctmp := c[i*ldc : i*ldc+n] 293 for j := 0; j < n; j++ { 294 ctmp[j] *= beta 295 } 296 } 297 } 298 return 299 } 300 301 isUpper := ul == blas.Upper 302 if s == blas.Left { 303 for i := 0; i < m; i++ { 304 atmp := alpha * a[i*lda+i] 305 btmp := b[i*ldb : i*ldb+n] 306 ctmp := c[i*ldc : i*ldc+n] 307 for j, v := range btmp { 308 ctmp[j] *= beta 309 ctmp[j] += atmp * v 310 } 311 312 for k := 0; k < i; k++ { 313 var atmp float32 314 if isUpper { 315 atmp = a[k*lda+i] 316 } else { 317 atmp = a[i*lda+k] 318 } 319 atmp *= alpha 320 f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 321 } 322 for k := i + 1; k < m; k++ { 323 var atmp float32 324 if isUpper { 325 atmp = a[i*lda+k] 326 } else { 327 atmp = a[k*lda+i] 328 } 329 atmp *= alpha 330 f32.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 331 } 332 } 333 return 334 } 335 if isUpper { 336 for i := 0; i < m; i++ { 337 for j := n - 1; j >= 0; j-- { 338 tmp := alpha * b[i*ldb+j] 339 var tmp2 float32 340 atmp := a[j*lda+j+1 : j*lda+n] 341 btmp := b[i*ldb+j+1 : i*ldb+n] 342 ctmp := c[i*ldc+j+1 : i*ldc+n] 343 for k, v := range atmp { 344 ctmp[k] += tmp * v 345 tmp2 += btmp[k] * v 346 } 347 c[i*ldc+j] *= beta 348 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 349 } 350 } 351 return 352 } 353 for i := 0; i < m; i++ { 354 for j := 0; j < n; j++ { 355 tmp := alpha * b[i*ldb+j] 356 var tmp2 float32 357 atmp := a[j*lda : j*lda+j] 358 btmp := b[i*ldb : i*ldb+j] 359 ctmp := c[i*ldc : i*ldc+j] 360 for k, v := range atmp { 361 ctmp[k] += tmp * v 362 tmp2 += btmp[k] * v 363 } 364 c[i*ldc+j] *= beta 365 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 366 } 367 } 368 } 369 370 // Ssyrk performs one of the symmetric rank-k operations 371 // 372 // C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans 373 // C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 374 // 375 // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and 376 // beta are scalars. 377 // 378 // Float32 implementations are autogenerated and not directly tested. 379 func (Implementation) Ssyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, beta float32, c []float32, ldc int) { 380 if ul != blas.Lower && ul != blas.Upper { 381 panic(badUplo) 382 } 383 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 384 panic(badTranspose) 385 } 386 if n < 0 { 387 panic(nLT0) 388 } 389 if k < 0 { 390 panic(kLT0) 391 } 392 row, col := k, n 393 if tA == blas.NoTrans { 394 row, col = n, k 395 } 396 if lda < max(1, col) { 397 panic(badLdA) 398 } 399 if ldc < max(1, n) { 400 panic(badLdC) 401 } 402 403 // Quick return if possible. 404 if n == 0 { 405 return 406 } 407 408 // For zero matrix size the following slice length checks are trivially satisfied. 409 if len(a) < lda*(row-1)+col { 410 panic(shortA) 411 } 412 if len(c) < ldc*(n-1)+n { 413 panic(shortC) 414 } 415 416 if alpha == 0 { 417 if beta == 0 { 418 if ul == blas.Upper { 419 for i := 0; i < n; i++ { 420 ctmp := c[i*ldc+i : i*ldc+n] 421 for j := range ctmp { 422 ctmp[j] = 0 423 } 424 } 425 return 426 } 427 for i := 0; i < n; i++ { 428 ctmp := c[i*ldc : i*ldc+i+1] 429 for j := range ctmp { 430 ctmp[j] = 0 431 } 432 } 433 return 434 } 435 if ul == blas.Upper { 436 for i := 0; i < n; i++ { 437 ctmp := c[i*ldc+i : i*ldc+n] 438 for j := range ctmp { 439 ctmp[j] *= beta 440 } 441 } 442 return 443 } 444 for i := 0; i < n; i++ { 445 ctmp := c[i*ldc : i*ldc+i+1] 446 for j := range ctmp { 447 ctmp[j] *= beta 448 } 449 } 450 return 451 } 452 if tA == blas.NoTrans { 453 if ul == blas.Upper { 454 for i := 0; i < n; i++ { 455 ctmp := c[i*ldc+i : i*ldc+n] 456 atmp := a[i*lda : i*lda+k] 457 if beta == 0 { 458 for jc := range ctmp { 459 j := jc + i 460 ctmp[jc] = alpha * f32.DotUnitary(atmp, a[j*lda:j*lda+k]) 461 } 462 } else { 463 for jc, vc := range ctmp { 464 j := jc + i 465 ctmp[jc] = vc*beta + alpha*f32.DotUnitary(atmp, a[j*lda:j*lda+k]) 466 } 467 } 468 } 469 return 470 } 471 for i := 0; i < n; i++ { 472 ctmp := c[i*ldc : i*ldc+i+1] 473 atmp := a[i*lda : i*lda+k] 474 if beta == 0 { 475 for j := range ctmp { 476 ctmp[j] = alpha * f32.DotUnitary(a[j*lda:j*lda+k], atmp) 477 } 478 } else { 479 for j, vc := range ctmp { 480 ctmp[j] = vc*beta + alpha*f32.DotUnitary(a[j*lda:j*lda+k], atmp) 481 } 482 } 483 } 484 return 485 } 486 // Cases where a is transposed. 487 if ul == blas.Upper { 488 for i := 0; i < n; i++ { 489 ctmp := c[i*ldc+i : i*ldc+n] 490 if beta == 0 { 491 for j := range ctmp { 492 ctmp[j] = 0 493 } 494 } else if beta != 1 { 495 for j := range ctmp { 496 ctmp[j] *= beta 497 } 498 } 499 for l := 0; l < k; l++ { 500 tmp := alpha * a[l*lda+i] 501 if tmp != 0 { 502 f32.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp) 503 } 504 } 505 } 506 return 507 } 508 for i := 0; i < n; i++ { 509 ctmp := c[i*ldc : i*ldc+i+1] 510 if beta != 1 { 511 for j := range ctmp { 512 ctmp[j] *= beta 513 } 514 } 515 for l := 0; l < k; l++ { 516 tmp := alpha * a[l*lda+i] 517 if tmp != 0 { 518 f32.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp) 519 } 520 } 521 } 522 } 523 524 // Ssyr2k performs one of the symmetric rank 2k operations 525 // 526 // C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans 527 // C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 528 // 529 // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and 530 // alpha and beta are scalars. 531 // 532 // Float32 implementations are autogenerated and not directly tested. 533 func (Implementation) Ssyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float32, a []float32, lda int, b []float32, ldb int, beta float32, c []float32, ldc int) { 534 if ul != blas.Lower && ul != blas.Upper { 535 panic(badUplo) 536 } 537 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 538 panic(badTranspose) 539 } 540 if n < 0 { 541 panic(nLT0) 542 } 543 if k < 0 { 544 panic(kLT0) 545 } 546 row, col := k, n 547 if tA == blas.NoTrans { 548 row, col = n, k 549 } 550 if lda < max(1, col) { 551 panic(badLdA) 552 } 553 if ldb < max(1, col) { 554 panic(badLdB) 555 } 556 if ldc < max(1, n) { 557 panic(badLdC) 558 } 559 560 // Quick return if possible. 561 if n == 0 { 562 return 563 } 564 565 // For zero matrix size the following slice length checks are trivially satisfied. 566 if len(a) < lda*(row-1)+col { 567 panic(shortA) 568 } 569 if len(b) < ldb*(row-1)+col { 570 panic(shortB) 571 } 572 if len(c) < ldc*(n-1)+n { 573 panic(shortC) 574 } 575 576 if alpha == 0 { 577 if beta == 0 { 578 if ul == blas.Upper { 579 for i := 0; i < n; i++ { 580 ctmp := c[i*ldc+i : i*ldc+n] 581 for j := range ctmp { 582 ctmp[j] = 0 583 } 584 } 585 return 586 } 587 for i := 0; i < n; i++ { 588 ctmp := c[i*ldc : i*ldc+i+1] 589 for j := range ctmp { 590 ctmp[j] = 0 591 } 592 } 593 return 594 } 595 if ul == blas.Upper { 596 for i := 0; i < n; i++ { 597 ctmp := c[i*ldc+i : i*ldc+n] 598 for j := range ctmp { 599 ctmp[j] *= beta 600 } 601 } 602 return 603 } 604 for i := 0; i < n; i++ { 605 ctmp := c[i*ldc : i*ldc+i+1] 606 for j := range ctmp { 607 ctmp[j] *= beta 608 } 609 } 610 return 611 } 612 if tA == blas.NoTrans { 613 if ul == blas.Upper { 614 for i := 0; i < n; i++ { 615 atmp := a[i*lda : i*lda+k] 616 btmp := b[i*ldb : i*ldb+k] 617 ctmp := c[i*ldc+i : i*ldc+n] 618 if beta == 0 { 619 for jc := range ctmp { 620 j := i + jc 621 var tmp1, tmp2 float32 622 binner := b[j*ldb : j*ldb+k] 623 for l, v := range a[j*lda : j*lda+k] { 624 tmp1 += v * btmp[l] 625 tmp2 += atmp[l] * binner[l] 626 } 627 ctmp[jc] = alpha * (tmp1 + tmp2) 628 } 629 } else { 630 for jc := range ctmp { 631 j := i + jc 632 var tmp1, tmp2 float32 633 binner := b[j*ldb : j*ldb+k] 634 for l, v := range a[j*lda : j*lda+k] { 635 tmp1 += v * btmp[l] 636 tmp2 += atmp[l] * binner[l] 637 } 638 ctmp[jc] *= beta 639 ctmp[jc] += alpha * (tmp1 + tmp2) 640 } 641 } 642 } 643 return 644 } 645 for i := 0; i < n; i++ { 646 atmp := a[i*lda : i*lda+k] 647 btmp := b[i*ldb : i*ldb+k] 648 ctmp := c[i*ldc : i*ldc+i+1] 649 if beta == 0 { 650 for j := 0; j <= i; j++ { 651 var tmp1, tmp2 float32 652 binner := b[j*ldb : j*ldb+k] 653 for l, v := range a[j*lda : j*lda+k] { 654 tmp1 += v * btmp[l] 655 tmp2 += atmp[l] * binner[l] 656 } 657 ctmp[j] = alpha * (tmp1 + tmp2) 658 } 659 } else { 660 for j := 0; j <= i; j++ { 661 var tmp1, tmp2 float32 662 binner := b[j*ldb : j*ldb+k] 663 for l, v := range a[j*lda : j*lda+k] { 664 tmp1 += v * btmp[l] 665 tmp2 += atmp[l] * binner[l] 666 } 667 ctmp[j] *= beta 668 ctmp[j] += alpha * (tmp1 + tmp2) 669 } 670 } 671 } 672 return 673 } 674 if ul == blas.Upper { 675 for i := 0; i < n; i++ { 676 ctmp := c[i*ldc+i : i*ldc+n] 677 switch beta { 678 case 0: 679 for j := range ctmp { 680 ctmp[j] = 0 681 } 682 case 1: 683 default: 684 for j := range ctmp { 685 ctmp[j] *= beta 686 } 687 } 688 for l := 0; l < k; l++ { 689 tmp1 := alpha * b[l*ldb+i] 690 tmp2 := alpha * a[l*lda+i] 691 btmp := b[l*ldb+i : l*ldb+n] 692 if tmp1 != 0 || tmp2 != 0 { 693 for j, v := range a[l*lda+i : l*lda+n] { 694 ctmp[j] += v*tmp1 + btmp[j]*tmp2 695 } 696 } 697 } 698 } 699 return 700 } 701 for i := 0; i < n; i++ { 702 ctmp := c[i*ldc : i*ldc+i+1] 703 switch beta { 704 case 0: 705 for j := range ctmp { 706 ctmp[j] = 0 707 } 708 case 1: 709 default: 710 for j := range ctmp { 711 ctmp[j] *= beta 712 } 713 } 714 for l := 0; l < k; l++ { 715 tmp1 := alpha * b[l*ldb+i] 716 tmp2 := alpha * a[l*lda+i] 717 btmp := b[l*ldb : l*ldb+i+1] 718 if tmp1 != 0 || tmp2 != 0 { 719 for j, v := range a[l*lda : l*lda+i+1] { 720 ctmp[j] += v*tmp1 + btmp[j]*tmp2 721 } 722 } 723 } 724 } 725 } 726 727 // Strmm performs one of the matrix-matrix operations 728 // 729 // B = alpha * A * B if tA == blas.NoTrans and side == blas.Left 730 // B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 731 // B = alpha * B * A if tA == blas.NoTrans and side == blas.Right 732 // B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 733 // 734 // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar. 735 // 736 // Float32 implementations are autogenerated and not directly tested. 737 func (Implementation) Strmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float32, a []float32, lda int, b []float32, ldb int) { 738 if s != blas.Left && s != blas.Right { 739 panic(badSide) 740 } 741 if ul != blas.Lower && ul != blas.Upper { 742 panic(badUplo) 743 } 744 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 745 panic(badTranspose) 746 } 747 if d != blas.NonUnit && d != blas.Unit { 748 panic(badDiag) 749 } 750 if m < 0 { 751 panic(mLT0) 752 } 753 if n < 0 { 754 panic(nLT0) 755 } 756 k := n 757 if s == blas.Left { 758 k = m 759 } 760 if lda < max(1, k) { 761 panic(badLdA) 762 } 763 if ldb < max(1, n) { 764 panic(badLdB) 765 } 766 767 // Quick return if possible. 768 if m == 0 || n == 0 { 769 return 770 } 771 772 // For zero matrix size the following slice length checks are trivially satisfied. 773 if len(a) < lda*(k-1)+k { 774 panic(shortA) 775 } 776 if len(b) < ldb*(m-1)+n { 777 panic(shortB) 778 } 779 780 if alpha == 0 { 781 for i := 0; i < m; i++ { 782 btmp := b[i*ldb : i*ldb+n] 783 for j := range btmp { 784 btmp[j] = 0 785 } 786 } 787 return 788 } 789 790 nonUnit := d == blas.NonUnit 791 if s == blas.Left { 792 if tA == blas.NoTrans { 793 if ul == blas.Upper { 794 for i := 0; i < m; i++ { 795 tmp := alpha 796 if nonUnit { 797 tmp *= a[i*lda+i] 798 } 799 btmp := b[i*ldb : i*ldb+n] 800 f32.ScalUnitary(tmp, btmp) 801 for ka, va := range a[i*lda+i+1 : i*lda+m] { 802 k := ka + i + 1 803 if va != 0 { 804 f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 805 } 806 } 807 } 808 return 809 } 810 for i := m - 1; i >= 0; i-- { 811 tmp := alpha 812 if nonUnit { 813 tmp *= a[i*lda+i] 814 } 815 btmp := b[i*ldb : i*ldb+n] 816 f32.ScalUnitary(tmp, btmp) 817 for k, va := range a[i*lda : i*lda+i] { 818 if va != 0 { 819 f32.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 820 } 821 } 822 } 823 return 824 } 825 // Cases where a is transposed. 826 if ul == blas.Upper { 827 for k := m - 1; k >= 0; k-- { 828 btmpk := b[k*ldb : k*ldb+n] 829 for ia, va := range a[k*lda+k+1 : k*lda+m] { 830 i := ia + k + 1 831 btmp := b[i*ldb : i*ldb+n] 832 if va != 0 { 833 f32.AxpyUnitary(alpha*va, btmpk, btmp) 834 } 835 } 836 tmp := alpha 837 if nonUnit { 838 tmp *= a[k*lda+k] 839 } 840 if tmp != 1 { 841 f32.ScalUnitary(tmp, btmpk) 842 } 843 } 844 return 845 } 846 for k := 0; k < m; k++ { 847 btmpk := b[k*ldb : k*ldb+n] 848 for i, va := range a[k*lda : k*lda+k] { 849 btmp := b[i*ldb : i*ldb+n] 850 if va != 0 { 851 f32.AxpyUnitary(alpha*va, btmpk, btmp) 852 } 853 } 854 tmp := alpha 855 if nonUnit { 856 tmp *= a[k*lda+k] 857 } 858 if tmp != 1 { 859 f32.ScalUnitary(tmp, btmpk) 860 } 861 } 862 return 863 } 864 // Cases where a is on the right 865 if tA == blas.NoTrans { 866 if ul == blas.Upper { 867 for i := 0; i < m; i++ { 868 btmp := b[i*ldb : i*ldb+n] 869 for k := n - 1; k >= 0; k-- { 870 tmp := alpha * btmp[k] 871 if tmp == 0 { 872 continue 873 } 874 btmp[k] = tmp 875 if nonUnit { 876 btmp[k] *= a[k*lda+k] 877 } 878 f32.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 879 } 880 } 881 return 882 } 883 for i := 0; i < m; i++ { 884 btmp := b[i*ldb : i*ldb+n] 885 for k := 0; k < n; k++ { 886 tmp := alpha * btmp[k] 887 if tmp == 0 { 888 continue 889 } 890 btmp[k] = tmp 891 if nonUnit { 892 btmp[k] *= a[k*lda+k] 893 } 894 f32.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k]) 895 } 896 } 897 return 898 } 899 // Cases where a is transposed. 900 if ul == blas.Upper { 901 for i := 0; i < m; i++ { 902 btmp := b[i*ldb : i*ldb+n] 903 for j, vb := range btmp { 904 tmp := vb 905 if nonUnit { 906 tmp *= a[j*lda+j] 907 } 908 tmp += f32.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) 909 btmp[j] = alpha * tmp 910 } 911 } 912 return 913 } 914 for i := 0; i < m; i++ { 915 btmp := b[i*ldb : i*ldb+n] 916 for j := n - 1; j >= 0; j-- { 917 tmp := btmp[j] 918 if nonUnit { 919 tmp *= a[j*lda+j] 920 } 921 tmp += f32.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 922 btmp[j] = alpha * tmp 923 } 924 } 925 }