gonum.org/v1/gonum@v0.14.0/blas/gonum/level3float64.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "gonum.org/v1/gonum/blas" 9 "gonum.org/v1/gonum/internal/asm/f64" 10 ) 11 12 var _ blas.Float64Level3 = Implementation{} 13 14 // Dtrsm solves one of the matrix equations 15 // 16 // A * X = alpha * B if tA == blas.NoTrans and side == blas.Left 17 // Aᵀ * X = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 18 // X * A = alpha * B if tA == blas.NoTrans and side == blas.Right 19 // X * Aᵀ = alpha * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 20 // 21 // where A is an n×n or m×m triangular matrix, X and B are m×n matrices, and alpha is a 22 // scalar. 23 // 24 // At entry to the function, X contains the values of B, and the result is 25 // stored in-place into X. 26 // 27 // No check is made that A is invertible. 28 func (Implementation) Dtrsm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { 29 if s != blas.Left && s != blas.Right { 30 panic(badSide) 31 } 32 if ul != blas.Lower && ul != blas.Upper { 33 panic(badUplo) 34 } 35 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 36 panic(badTranspose) 37 } 38 if d != blas.NonUnit && d != blas.Unit { 39 panic(badDiag) 40 } 41 if m < 0 { 42 panic(mLT0) 43 } 44 if n < 0 { 45 panic(nLT0) 46 } 47 k := n 48 if s == blas.Left { 49 k = m 50 } 51 if lda < max(1, k) { 52 panic(badLdA) 53 } 54 if ldb < max(1, n) { 55 panic(badLdB) 56 } 57 58 // Quick return if possible. 59 if m == 0 || n == 0 { 60 return 61 } 62 63 // For zero matrix size the following slice length checks are trivially satisfied. 64 if len(a) < lda*(k-1)+k { 65 panic(shortA) 66 } 67 if len(b) < ldb*(m-1)+n { 68 panic(shortB) 69 } 70 71 if alpha == 0 { 72 for i := 0; i < m; i++ { 73 btmp := b[i*ldb : i*ldb+n] 74 for j := range btmp { 75 btmp[j] = 0 76 } 77 } 78 return 79 } 80 nonUnit := d == blas.NonUnit 81 if s == blas.Left { 82 if tA == blas.NoTrans { 83 if ul == blas.Upper { 84 for i := m - 1; i >= 0; i-- { 85 btmp := b[i*ldb : i*ldb+n] 86 if alpha != 1 { 87 f64.ScalUnitary(alpha, btmp) 88 } 89 for ka, va := range a[i*lda+i+1 : i*lda+m] { 90 if va != 0 { 91 k := ka + i + 1 92 f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 93 } 94 } 95 if nonUnit { 96 tmp := 1 / a[i*lda+i] 97 f64.ScalUnitary(tmp, btmp) 98 } 99 } 100 return 101 } 102 for i := 0; i < m; i++ { 103 btmp := b[i*ldb : i*ldb+n] 104 if alpha != 1 { 105 f64.ScalUnitary(alpha, btmp) 106 } 107 for k, va := range a[i*lda : i*lda+i] { 108 if va != 0 { 109 f64.AxpyUnitary(-va, b[k*ldb:k*ldb+n], btmp) 110 } 111 } 112 if nonUnit { 113 tmp := 1 / a[i*lda+i] 114 f64.ScalUnitary(tmp, btmp) 115 } 116 } 117 return 118 } 119 // Cases where a is transposed 120 if ul == blas.Upper { 121 for k := 0; k < m; k++ { 122 btmpk := b[k*ldb : k*ldb+n] 123 if nonUnit { 124 tmp := 1 / a[k*lda+k] 125 f64.ScalUnitary(tmp, btmpk) 126 } 127 for ia, va := range a[k*lda+k+1 : k*lda+m] { 128 if va != 0 { 129 i := ia + k + 1 130 f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 131 } 132 } 133 if alpha != 1 { 134 f64.ScalUnitary(alpha, btmpk) 135 } 136 } 137 return 138 } 139 for k := m - 1; k >= 0; k-- { 140 btmpk := b[k*ldb : k*ldb+n] 141 if nonUnit { 142 tmp := 1 / a[k*lda+k] 143 f64.ScalUnitary(tmp, btmpk) 144 } 145 for i, va := range a[k*lda : k*lda+k] { 146 if va != 0 { 147 f64.AxpyUnitary(-va, btmpk, b[i*ldb:i*ldb+n]) 148 } 149 } 150 if alpha != 1 { 151 f64.ScalUnitary(alpha, btmpk) 152 } 153 } 154 return 155 } 156 // Cases where a is to the right of X. 157 if tA == blas.NoTrans { 158 if ul == blas.Upper { 159 for i := 0; i < m; i++ { 160 btmp := b[i*ldb : i*ldb+n] 161 if alpha != 1 { 162 f64.ScalUnitary(alpha, btmp) 163 } 164 for k, vb := range btmp { 165 if vb == 0 { 166 continue 167 } 168 if nonUnit { 169 btmp[k] /= a[k*lda+k] 170 } 171 f64.AxpyUnitary(-btmp[k], a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 172 } 173 } 174 return 175 } 176 for i := 0; i < m; i++ { 177 btmp := b[i*ldb : i*ldb+n] 178 if alpha != 1 { 179 f64.ScalUnitary(alpha, btmp) 180 } 181 for k := n - 1; k >= 0; k-- { 182 if btmp[k] == 0 { 183 continue 184 } 185 if nonUnit { 186 btmp[k] /= a[k*lda+k] 187 } 188 f64.AxpyUnitary(-btmp[k], a[k*lda:k*lda+k], btmp[:k]) 189 } 190 } 191 return 192 } 193 // Cases where a is transposed. 194 if ul == blas.Upper { 195 for i := 0; i < m; i++ { 196 btmp := b[i*ldb : i*ldb+n] 197 for j := n - 1; j >= 0; j-- { 198 tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:]) 199 if nonUnit { 200 tmp /= a[j*lda+j] 201 } 202 btmp[j] = tmp 203 } 204 } 205 return 206 } 207 for i := 0; i < m; i++ { 208 btmp := b[i*ldb : i*ldb+n] 209 for j := 0; j < n; j++ { 210 tmp := alpha*btmp[j] - f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 211 if nonUnit { 212 tmp /= a[j*lda+j] 213 } 214 btmp[j] = tmp 215 } 216 } 217 } 218 219 // Dsymm performs one of the matrix-matrix operations 220 // 221 // C = alpha * A * B + beta * C if side == blas.Left 222 // C = alpha * B * A + beta * C if side == blas.Right 223 // 224 // where A is an n×n or m×m symmetric matrix, B and C are m×n matrices, and alpha 225 // is a scalar. 226 func (Implementation) Dsymm(s blas.Side, ul blas.Uplo, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { 227 if s != blas.Right && s != blas.Left { 228 panic(badSide) 229 } 230 if ul != blas.Lower && ul != blas.Upper { 231 panic(badUplo) 232 } 233 if m < 0 { 234 panic(mLT0) 235 } 236 if n < 0 { 237 panic(nLT0) 238 } 239 k := n 240 if s == blas.Left { 241 k = m 242 } 243 if lda < max(1, k) { 244 panic(badLdA) 245 } 246 if ldb < max(1, n) { 247 panic(badLdB) 248 } 249 if ldc < max(1, n) { 250 panic(badLdC) 251 } 252 253 // Quick return if possible. 254 if m == 0 || n == 0 { 255 return 256 } 257 258 // For zero matrix size the following slice length checks are trivially satisfied. 259 if len(a) < lda*(k-1)+k { 260 panic(shortA) 261 } 262 if len(b) < ldb*(m-1)+n { 263 panic(shortB) 264 } 265 if len(c) < ldc*(m-1)+n { 266 panic(shortC) 267 } 268 269 // Quick return if possible. 270 if alpha == 0 && beta == 1 { 271 return 272 } 273 274 if beta == 0 { 275 for i := 0; i < m; i++ { 276 ctmp := c[i*ldc : i*ldc+n] 277 for j := range ctmp { 278 ctmp[j] = 0 279 } 280 } 281 } 282 283 if alpha == 0 { 284 if beta != 0 { 285 for i := 0; i < m; i++ { 286 ctmp := c[i*ldc : i*ldc+n] 287 for j := 0; j < n; j++ { 288 ctmp[j] *= beta 289 } 290 } 291 } 292 return 293 } 294 295 isUpper := ul == blas.Upper 296 if s == blas.Left { 297 for i := 0; i < m; i++ { 298 atmp := alpha * a[i*lda+i] 299 btmp := b[i*ldb : i*ldb+n] 300 ctmp := c[i*ldc : i*ldc+n] 301 for j, v := range btmp { 302 ctmp[j] *= beta 303 ctmp[j] += atmp * v 304 } 305 306 for k := 0; k < i; k++ { 307 var atmp float64 308 if isUpper { 309 atmp = a[k*lda+i] 310 } else { 311 atmp = a[i*lda+k] 312 } 313 atmp *= alpha 314 f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 315 } 316 for k := i + 1; k < m; k++ { 317 var atmp float64 318 if isUpper { 319 atmp = a[i*lda+k] 320 } else { 321 atmp = a[k*lda+i] 322 } 323 atmp *= alpha 324 f64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ctmp) 325 } 326 } 327 return 328 } 329 if isUpper { 330 for i := 0; i < m; i++ { 331 for j := n - 1; j >= 0; j-- { 332 tmp := alpha * b[i*ldb+j] 333 var tmp2 float64 334 atmp := a[j*lda+j+1 : j*lda+n] 335 btmp := b[i*ldb+j+1 : i*ldb+n] 336 ctmp := c[i*ldc+j+1 : i*ldc+n] 337 for k, v := range atmp { 338 ctmp[k] += tmp * v 339 tmp2 += btmp[k] * v 340 } 341 c[i*ldc+j] *= beta 342 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 343 } 344 } 345 return 346 } 347 for i := 0; i < m; i++ { 348 for j := 0; j < n; j++ { 349 tmp := alpha * b[i*ldb+j] 350 var tmp2 float64 351 atmp := a[j*lda : j*lda+j] 352 btmp := b[i*ldb : i*ldb+j] 353 ctmp := c[i*ldc : i*ldc+j] 354 for k, v := range atmp { 355 ctmp[k] += tmp * v 356 tmp2 += btmp[k] * v 357 } 358 c[i*ldc+j] *= beta 359 c[i*ldc+j] += tmp*a[j*lda+j] + alpha*tmp2 360 } 361 } 362 } 363 364 // Dsyrk performs one of the symmetric rank-k operations 365 // 366 // C = alpha * A * Aᵀ + beta * C if tA == blas.NoTrans 367 // C = alpha * Aᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 368 // 369 // where A is an n×k or k×n matrix, C is an n×n symmetric matrix, and alpha and 370 // beta are scalars. 371 func (Implementation) Dsyrk(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, beta float64, c []float64, ldc int) { 372 if ul != blas.Lower && ul != blas.Upper { 373 panic(badUplo) 374 } 375 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 376 panic(badTranspose) 377 } 378 if n < 0 { 379 panic(nLT0) 380 } 381 if k < 0 { 382 panic(kLT0) 383 } 384 row, col := k, n 385 if tA == blas.NoTrans { 386 row, col = n, k 387 } 388 if lda < max(1, col) { 389 panic(badLdA) 390 } 391 if ldc < max(1, n) { 392 panic(badLdC) 393 } 394 395 // Quick return if possible. 396 if n == 0 { 397 return 398 } 399 400 // For zero matrix size the following slice length checks are trivially satisfied. 401 if len(a) < lda*(row-1)+col { 402 panic(shortA) 403 } 404 if len(c) < ldc*(n-1)+n { 405 panic(shortC) 406 } 407 408 if alpha == 0 { 409 if beta == 0 { 410 if ul == blas.Upper { 411 for i := 0; i < n; i++ { 412 ctmp := c[i*ldc+i : i*ldc+n] 413 for j := range ctmp { 414 ctmp[j] = 0 415 } 416 } 417 return 418 } 419 for i := 0; i < n; i++ { 420 ctmp := c[i*ldc : i*ldc+i+1] 421 for j := range ctmp { 422 ctmp[j] = 0 423 } 424 } 425 return 426 } 427 if ul == blas.Upper { 428 for i := 0; i < n; i++ { 429 ctmp := c[i*ldc+i : i*ldc+n] 430 for j := range ctmp { 431 ctmp[j] *= beta 432 } 433 } 434 return 435 } 436 for i := 0; i < n; i++ { 437 ctmp := c[i*ldc : i*ldc+i+1] 438 for j := range ctmp { 439 ctmp[j] *= beta 440 } 441 } 442 return 443 } 444 if tA == blas.NoTrans { 445 if ul == blas.Upper { 446 for i := 0; i < n; i++ { 447 ctmp := c[i*ldc+i : i*ldc+n] 448 atmp := a[i*lda : i*lda+k] 449 if beta == 0 { 450 for jc := range ctmp { 451 j := jc + i 452 ctmp[jc] = alpha * f64.DotUnitary(atmp, a[j*lda:j*lda+k]) 453 } 454 } else { 455 for jc, vc := range ctmp { 456 j := jc + i 457 ctmp[jc] = vc*beta + alpha*f64.DotUnitary(atmp, a[j*lda:j*lda+k]) 458 } 459 } 460 } 461 return 462 } 463 for i := 0; i < n; i++ { 464 ctmp := c[i*ldc : i*ldc+i+1] 465 atmp := a[i*lda : i*lda+k] 466 if beta == 0 { 467 for j := range ctmp { 468 ctmp[j] = alpha * f64.DotUnitary(a[j*lda:j*lda+k], atmp) 469 } 470 } else { 471 for j, vc := range ctmp { 472 ctmp[j] = vc*beta + alpha*f64.DotUnitary(a[j*lda:j*lda+k], atmp) 473 } 474 } 475 } 476 return 477 } 478 // Cases where a is transposed. 479 if ul == blas.Upper { 480 for i := 0; i < n; i++ { 481 ctmp := c[i*ldc+i : i*ldc+n] 482 if beta == 0 { 483 for j := range ctmp { 484 ctmp[j] = 0 485 } 486 } else if beta != 1 { 487 for j := range ctmp { 488 ctmp[j] *= beta 489 } 490 } 491 for l := 0; l < k; l++ { 492 tmp := alpha * a[l*lda+i] 493 if tmp != 0 { 494 f64.AxpyUnitary(tmp, a[l*lda+i:l*lda+n], ctmp) 495 } 496 } 497 } 498 return 499 } 500 for i := 0; i < n; i++ { 501 ctmp := c[i*ldc : i*ldc+i+1] 502 if beta != 1 { 503 for j := range ctmp { 504 ctmp[j] *= beta 505 } 506 } 507 for l := 0; l < k; l++ { 508 tmp := alpha * a[l*lda+i] 509 if tmp != 0 { 510 f64.AxpyUnitary(tmp, a[l*lda:l*lda+i+1], ctmp) 511 } 512 } 513 } 514 } 515 516 // Dsyr2k performs one of the symmetric rank 2k operations 517 // 518 // C = alpha * A * Bᵀ + alpha * B * Aᵀ + beta * C if tA == blas.NoTrans 519 // C = alpha * Aᵀ * B + alpha * Bᵀ * A + beta * C if tA == blas.Trans or tA == blas.ConjTrans 520 // 521 // where A and B are n×k or k×n matrices, C is an n×n symmetric matrix, and 522 // alpha and beta are scalars. 523 func (Implementation) Dsyr2k(ul blas.Uplo, tA blas.Transpose, n, k int, alpha float64, a []float64, lda int, b []float64, ldb int, beta float64, c []float64, ldc int) { 524 if ul != blas.Lower && ul != blas.Upper { 525 panic(badUplo) 526 } 527 if tA != blas.Trans && tA != blas.NoTrans && tA != blas.ConjTrans { 528 panic(badTranspose) 529 } 530 if n < 0 { 531 panic(nLT0) 532 } 533 if k < 0 { 534 panic(kLT0) 535 } 536 row, col := k, n 537 if tA == blas.NoTrans { 538 row, col = n, k 539 } 540 if lda < max(1, col) { 541 panic(badLdA) 542 } 543 if ldb < max(1, col) { 544 panic(badLdB) 545 } 546 if ldc < max(1, n) { 547 panic(badLdC) 548 } 549 550 // Quick return if possible. 551 if n == 0 { 552 return 553 } 554 555 // For zero matrix size the following slice length checks are trivially satisfied. 556 if len(a) < lda*(row-1)+col { 557 panic(shortA) 558 } 559 if len(b) < ldb*(row-1)+col { 560 panic(shortB) 561 } 562 if len(c) < ldc*(n-1)+n { 563 panic(shortC) 564 } 565 566 if alpha == 0 { 567 if beta == 0 { 568 if ul == blas.Upper { 569 for i := 0; i < n; i++ { 570 ctmp := c[i*ldc+i : i*ldc+n] 571 for j := range ctmp { 572 ctmp[j] = 0 573 } 574 } 575 return 576 } 577 for i := 0; i < n; i++ { 578 ctmp := c[i*ldc : i*ldc+i+1] 579 for j := range ctmp { 580 ctmp[j] = 0 581 } 582 } 583 return 584 } 585 if ul == blas.Upper { 586 for i := 0; i < n; i++ { 587 ctmp := c[i*ldc+i : i*ldc+n] 588 for j := range ctmp { 589 ctmp[j] *= beta 590 } 591 } 592 return 593 } 594 for i := 0; i < n; i++ { 595 ctmp := c[i*ldc : i*ldc+i+1] 596 for j := range ctmp { 597 ctmp[j] *= beta 598 } 599 } 600 return 601 } 602 if tA == blas.NoTrans { 603 if ul == blas.Upper { 604 for i := 0; i < n; i++ { 605 atmp := a[i*lda : i*lda+k] 606 btmp := b[i*ldb : i*ldb+k] 607 ctmp := c[i*ldc+i : i*ldc+n] 608 if beta == 0 { 609 for jc := range ctmp { 610 j := i + jc 611 var tmp1, tmp2 float64 612 binner := b[j*ldb : j*ldb+k] 613 for l, v := range a[j*lda : j*lda+k] { 614 tmp1 += v * btmp[l] 615 tmp2 += atmp[l] * binner[l] 616 } 617 ctmp[jc] = alpha * (tmp1 + tmp2) 618 } 619 } else { 620 for jc := range ctmp { 621 j := i + jc 622 var tmp1, tmp2 float64 623 binner := b[j*ldb : j*ldb+k] 624 for l, v := range a[j*lda : j*lda+k] { 625 tmp1 += v * btmp[l] 626 tmp2 += atmp[l] * binner[l] 627 } 628 ctmp[jc] *= beta 629 ctmp[jc] += alpha * (tmp1 + tmp2) 630 } 631 } 632 } 633 return 634 } 635 for i := 0; i < n; i++ { 636 atmp := a[i*lda : i*lda+k] 637 btmp := b[i*ldb : i*ldb+k] 638 ctmp := c[i*ldc : i*ldc+i+1] 639 if beta == 0 { 640 for j := 0; j <= i; j++ { 641 var tmp1, tmp2 float64 642 binner := b[j*ldb : j*ldb+k] 643 for l, v := range a[j*lda : j*lda+k] { 644 tmp1 += v * btmp[l] 645 tmp2 += atmp[l] * binner[l] 646 } 647 ctmp[j] = alpha * (tmp1 + tmp2) 648 } 649 } else { 650 for j := 0; j <= i; j++ { 651 var tmp1, tmp2 float64 652 binner := b[j*ldb : j*ldb+k] 653 for l, v := range a[j*lda : j*lda+k] { 654 tmp1 += v * btmp[l] 655 tmp2 += atmp[l] * binner[l] 656 } 657 ctmp[j] *= beta 658 ctmp[j] += alpha * (tmp1 + tmp2) 659 } 660 } 661 } 662 return 663 } 664 if ul == blas.Upper { 665 for i := 0; i < n; i++ { 666 ctmp := c[i*ldc+i : i*ldc+n] 667 switch beta { 668 case 0: 669 for j := range ctmp { 670 ctmp[j] = 0 671 } 672 case 1: 673 default: 674 for j := range ctmp { 675 ctmp[j] *= beta 676 } 677 } 678 for l := 0; l < k; l++ { 679 tmp1 := alpha * b[l*ldb+i] 680 tmp2 := alpha * a[l*lda+i] 681 btmp := b[l*ldb+i : l*ldb+n] 682 if tmp1 != 0 || tmp2 != 0 { 683 for j, v := range a[l*lda+i : l*lda+n] { 684 ctmp[j] += v*tmp1 + btmp[j]*tmp2 685 } 686 } 687 } 688 } 689 return 690 } 691 for i := 0; i < n; i++ { 692 ctmp := c[i*ldc : i*ldc+i+1] 693 switch beta { 694 case 0: 695 for j := range ctmp { 696 ctmp[j] = 0 697 } 698 case 1: 699 default: 700 for j := range ctmp { 701 ctmp[j] *= beta 702 } 703 } 704 for l := 0; l < k; l++ { 705 tmp1 := alpha * b[l*ldb+i] 706 tmp2 := alpha * a[l*lda+i] 707 btmp := b[l*ldb : l*ldb+i+1] 708 if tmp1 != 0 || tmp2 != 0 { 709 for j, v := range a[l*lda : l*lda+i+1] { 710 ctmp[j] += v*tmp1 + btmp[j]*tmp2 711 } 712 } 713 } 714 } 715 } 716 717 // Dtrmm performs one of the matrix-matrix operations 718 // 719 // B = alpha * A * B if tA == blas.NoTrans and side == blas.Left 720 // B = alpha * Aᵀ * B if tA == blas.Trans or blas.ConjTrans, and side == blas.Left 721 // B = alpha * B * A if tA == blas.NoTrans and side == blas.Right 722 // B = alpha * B * Aᵀ if tA == blas.Trans or blas.ConjTrans, and side == blas.Right 723 // 724 // where A is an n×n or m×m triangular matrix, B is an m×n matrix, and alpha is a scalar. 725 func (Implementation) Dtrmm(s blas.Side, ul blas.Uplo, tA blas.Transpose, d blas.Diag, m, n int, alpha float64, a []float64, lda int, b []float64, ldb int) { 726 if s != blas.Left && s != blas.Right { 727 panic(badSide) 728 } 729 if ul != blas.Lower && ul != blas.Upper { 730 panic(badUplo) 731 } 732 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 733 panic(badTranspose) 734 } 735 if d != blas.NonUnit && d != blas.Unit { 736 panic(badDiag) 737 } 738 if m < 0 { 739 panic(mLT0) 740 } 741 if n < 0 { 742 panic(nLT0) 743 } 744 k := n 745 if s == blas.Left { 746 k = m 747 } 748 if lda < max(1, k) { 749 panic(badLdA) 750 } 751 if ldb < max(1, n) { 752 panic(badLdB) 753 } 754 755 // Quick return if possible. 756 if m == 0 || n == 0 { 757 return 758 } 759 760 // For zero matrix size the following slice length checks are trivially satisfied. 761 if len(a) < lda*(k-1)+k { 762 panic(shortA) 763 } 764 if len(b) < ldb*(m-1)+n { 765 panic(shortB) 766 } 767 768 if alpha == 0 { 769 for i := 0; i < m; i++ { 770 btmp := b[i*ldb : i*ldb+n] 771 for j := range btmp { 772 btmp[j] = 0 773 } 774 } 775 return 776 } 777 778 nonUnit := d == blas.NonUnit 779 if s == blas.Left { 780 if tA == blas.NoTrans { 781 if ul == blas.Upper { 782 for i := 0; i < m; i++ { 783 tmp := alpha 784 if nonUnit { 785 tmp *= a[i*lda+i] 786 } 787 btmp := b[i*ldb : i*ldb+n] 788 f64.ScalUnitary(tmp, btmp) 789 for ka, va := range a[i*lda+i+1 : i*lda+m] { 790 k := ka + i + 1 791 if va != 0 { 792 f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 793 } 794 } 795 } 796 return 797 } 798 for i := m - 1; i >= 0; i-- { 799 tmp := alpha 800 if nonUnit { 801 tmp *= a[i*lda+i] 802 } 803 btmp := b[i*ldb : i*ldb+n] 804 f64.ScalUnitary(tmp, btmp) 805 for k, va := range a[i*lda : i*lda+i] { 806 if va != 0 { 807 f64.AxpyUnitary(alpha*va, b[k*ldb:k*ldb+n], btmp) 808 } 809 } 810 } 811 return 812 } 813 // Cases where a is transposed. 814 if ul == blas.Upper { 815 for k := m - 1; k >= 0; k-- { 816 btmpk := b[k*ldb : k*ldb+n] 817 for ia, va := range a[k*lda+k+1 : k*lda+m] { 818 i := ia + k + 1 819 btmp := b[i*ldb : i*ldb+n] 820 if va != 0 { 821 f64.AxpyUnitary(alpha*va, btmpk, btmp) 822 } 823 } 824 tmp := alpha 825 if nonUnit { 826 tmp *= a[k*lda+k] 827 } 828 if tmp != 1 { 829 f64.ScalUnitary(tmp, btmpk) 830 } 831 } 832 return 833 } 834 for k := 0; k < m; k++ { 835 btmpk := b[k*ldb : k*ldb+n] 836 for i, va := range a[k*lda : k*lda+k] { 837 btmp := b[i*ldb : i*ldb+n] 838 if va != 0 { 839 f64.AxpyUnitary(alpha*va, btmpk, btmp) 840 } 841 } 842 tmp := alpha 843 if nonUnit { 844 tmp *= a[k*lda+k] 845 } 846 if tmp != 1 { 847 f64.ScalUnitary(tmp, btmpk) 848 } 849 } 850 return 851 } 852 // Cases where a is on the right 853 if tA == blas.NoTrans { 854 if ul == blas.Upper { 855 for i := 0; i < m; i++ { 856 btmp := b[i*ldb : i*ldb+n] 857 for k := n - 1; k >= 0; k-- { 858 tmp := alpha * btmp[k] 859 if tmp == 0 { 860 continue 861 } 862 btmp[k] = tmp 863 if nonUnit { 864 btmp[k] *= a[k*lda+k] 865 } 866 f64.AxpyUnitary(tmp, a[k*lda+k+1:k*lda+n], btmp[k+1:n]) 867 } 868 } 869 return 870 } 871 for i := 0; i < m; i++ { 872 btmp := b[i*ldb : i*ldb+n] 873 for k := 0; k < n; k++ { 874 tmp := alpha * btmp[k] 875 if tmp == 0 { 876 continue 877 } 878 btmp[k] = tmp 879 if nonUnit { 880 btmp[k] *= a[k*lda+k] 881 } 882 f64.AxpyUnitary(tmp, a[k*lda:k*lda+k], btmp[:k]) 883 } 884 } 885 return 886 } 887 // Cases where a is transposed. 888 if ul == blas.Upper { 889 for i := 0; i < m; i++ { 890 btmp := b[i*ldb : i*ldb+n] 891 for j, vb := range btmp { 892 tmp := vb 893 if nonUnit { 894 tmp *= a[j*lda+j] 895 } 896 tmp += f64.DotUnitary(a[j*lda+j+1:j*lda+n], btmp[j+1:n]) 897 btmp[j] = alpha * tmp 898 } 899 } 900 return 901 } 902 for i := 0; i < m; i++ { 903 btmp := b[i*ldb : i*ldb+n] 904 for j := n - 1; j >= 0; j-- { 905 tmp := btmp[j] 906 if nonUnit { 907 tmp *= a[j*lda+j] 908 } 909 tmp += f64.DotUnitary(a[j*lda:j*lda+j], btmp[:j]) 910 btmp[j] = alpha * tmp 911 } 912 } 913 }