gonum.org/v1/gonum@v0.14.0/blas/gonum/level3cmplx128.go (about) 1 // Copyright ©2019 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "math/cmplx" 9 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/internal/asm/c128" 12 ) 13 14 var _ blas.Complex128Level3 = Implementation{} 15 16 // Zgemm performs one of the matrix-matrix operations 17 // 18 // C = alpha * op(A) * op(B) + beta * C 19 // 20 // where op(X) is one of 21 // 22 // op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, 23 // 24 // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, 25 // op(B) a k×n matrix and C an m×n matrix. 26 func (Implementation) Zgemm(tA, tB blas.Transpose, m, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { 27 switch tA { 28 default: 29 panic(badTranspose) 30 case blas.NoTrans, blas.Trans, blas.ConjTrans: 31 } 32 switch tB { 33 default: 34 panic(badTranspose) 35 case blas.NoTrans, blas.Trans, blas.ConjTrans: 36 } 37 switch { 38 case m < 0: 39 panic(mLT0) 40 case n < 0: 41 panic(nLT0) 42 case k < 0: 43 panic(kLT0) 44 } 45 rowA, colA := m, k 46 if tA != blas.NoTrans { 47 rowA, colA = k, m 48 } 49 if lda < max(1, colA) { 50 panic(badLdA) 51 } 52 rowB, colB := k, n 53 if tB != blas.NoTrans { 54 rowB, colB = n, k 55 } 56 if ldb < max(1, colB) { 57 panic(badLdB) 58 } 59 if ldc < max(1, n) { 60 panic(badLdC) 61 } 62 63 // Quick return if possible. 64 if m == 0 || n == 0 { 65 return 66 } 67 68 // For zero matrix size the following slice length checks are trivially satisfied. 69 if len(a) < (rowA-1)*lda+colA { 70 panic(shortA) 71 } 72 if len(b) < (rowB-1)*ldb+colB { 73 panic(shortB) 74 } 75 if len(c) < (m-1)*ldc+n { 76 panic(shortC) 77 } 78 79 // Quick return if possible. 80 if (alpha == 0 || k == 0) && beta == 1 { 81 return 82 } 83 84 if alpha == 0 { 85 if beta == 0 { 86 for i := 0; i < m; i++ { 87 for j := 0; j < n; j++ { 88 c[i*ldc+j] = 0 89 } 90 } 91 } else { 92 for i := 0; i < m; i++ { 93 for j := 0; j < n; j++ { 94 c[i*ldc+j] *= beta 95 } 96 } 97 } 98 return 99 } 100 101 switch tA { 102 case blas.NoTrans: 103 switch tB { 104 case blas.NoTrans: 105 // Form C = alpha * A * B + beta * C. 106 for i := 0; i < m; i++ { 107 switch { 108 case beta == 0: 109 for j := 0; j < n; j++ { 110 c[i*ldc+j] = 0 111 } 112 case beta != 1: 113 for j := 0; j < n; j++ { 114 c[i*ldc+j] *= beta 115 } 116 } 117 for l := 0; l < k; l++ { 118 tmp := alpha * a[i*lda+l] 119 for j := 0; j < n; j++ { 120 c[i*ldc+j] += tmp * b[l*ldb+j] 121 } 122 } 123 } 124 case blas.Trans: 125 // Form C = alpha * A * Bᵀ + beta * C. 126 for i := 0; i < m; i++ { 127 switch { 128 case beta == 0: 129 for j := 0; j < n; j++ { 130 c[i*ldc+j] = 0 131 } 132 case beta != 1: 133 for j := 0; j < n; j++ { 134 c[i*ldc+j] *= beta 135 } 136 } 137 for l := 0; l < k; l++ { 138 tmp := alpha * a[i*lda+l] 139 for j := 0; j < n; j++ { 140 c[i*ldc+j] += tmp * b[j*ldb+l] 141 } 142 } 143 } 144 case blas.ConjTrans: 145 // Form C = alpha * A * Bᴴ + beta * C. 146 for i := 0; i < m; i++ { 147 switch { 148 case beta == 0: 149 for j := 0; j < n; j++ { 150 c[i*ldc+j] = 0 151 } 152 case beta != 1: 153 for j := 0; j < n; j++ { 154 c[i*ldc+j] *= beta 155 } 156 } 157 for l := 0; l < k; l++ { 158 tmp := alpha * a[i*lda+l] 159 for j := 0; j < n; j++ { 160 c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) 161 } 162 } 163 } 164 } 165 case blas.Trans: 166 switch tB { 167 case blas.NoTrans: 168 // Form C = alpha * Aᵀ * B + beta * C. 169 for i := 0; i < m; i++ { 170 for j := 0; j < n; j++ { 171 var tmp complex128 172 for l := 0; l < k; l++ { 173 tmp += a[l*lda+i] * b[l*ldb+j] 174 } 175 if beta == 0 { 176 c[i*ldc+j] = alpha * tmp 177 } else { 178 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 179 } 180 } 181 } 182 case blas.Trans: 183 // Form C = alpha * Aᵀ * Bᵀ + beta * C. 184 for i := 0; i < m; i++ { 185 for j := 0; j < n; j++ { 186 var tmp complex128 187 for l := 0; l < k; l++ { 188 tmp += a[l*lda+i] * b[j*ldb+l] 189 } 190 if beta == 0 { 191 c[i*ldc+j] = alpha * tmp 192 } else { 193 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 194 } 195 } 196 } 197 case blas.ConjTrans: 198 // Form C = alpha * Aᵀ * Bᴴ + beta * C. 199 for i := 0; i < m; i++ { 200 for j := 0; j < n; j++ { 201 var tmp complex128 202 for l := 0; l < k; l++ { 203 tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) 204 } 205 if beta == 0 { 206 c[i*ldc+j] = alpha * tmp 207 } else { 208 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 209 } 210 } 211 } 212 } 213 case blas.ConjTrans: 214 switch tB { 215 case blas.NoTrans: 216 // Form C = alpha * Aᴴ * B + beta * C. 217 for i := 0; i < m; i++ { 218 for j := 0; j < n; j++ { 219 var tmp complex128 220 for l := 0; l < k; l++ { 221 tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] 222 } 223 if beta == 0 { 224 c[i*ldc+j] = alpha * tmp 225 } else { 226 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 227 } 228 } 229 } 230 case blas.Trans: 231 // Form C = alpha * Aᴴ * Bᵀ + beta * C. 232 for i := 0; i < m; i++ { 233 for j := 0; j < n; j++ { 234 var tmp complex128 235 for l := 0; l < k; l++ { 236 tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] 237 } 238 if beta == 0 { 239 c[i*ldc+j] = alpha * tmp 240 } else { 241 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 242 } 243 } 244 } 245 case blas.ConjTrans: 246 // Form C = alpha * Aᴴ * Bᴴ + beta * C. 247 for i := 0; i < m; i++ { 248 for j := 0; j < n; j++ { 249 var tmp complex128 250 for l := 0; l < k; l++ { 251 tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) 252 } 253 if beta == 0 { 254 c[i*ldc+j] = alpha * tmp 255 } else { 256 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 257 } 258 } 259 } 260 } 261 } 262 } 263 264 // Zhemm performs one of the matrix-matrix operations 265 // 266 // C = alpha*A*B + beta*C if side == blas.Left 267 // C = alpha*B*A + beta*C if side == blas.Right 268 // 269 // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B 270 // and C are m×n matrices. The imaginary parts of the diagonal elements of A are 271 // assumed to be zero. 272 func (Implementation) Zhemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { 273 na := m 274 if side == blas.Right { 275 na = n 276 } 277 switch { 278 case side != blas.Left && side != blas.Right: 279 panic(badSide) 280 case uplo != blas.Lower && uplo != blas.Upper: 281 panic(badUplo) 282 case m < 0: 283 panic(mLT0) 284 case n < 0: 285 panic(nLT0) 286 case lda < max(1, na): 287 panic(badLdA) 288 case ldb < max(1, n): 289 panic(badLdB) 290 case ldc < max(1, n): 291 panic(badLdC) 292 } 293 294 // Quick return if possible. 295 if m == 0 || n == 0 { 296 return 297 } 298 299 // For zero matrix size the following slice length checks are trivially satisfied. 300 if len(a) < lda*(na-1)+na { 301 panic(shortA) 302 } 303 if len(b) < ldb*(m-1)+n { 304 panic(shortB) 305 } 306 if len(c) < ldc*(m-1)+n { 307 panic(shortC) 308 } 309 310 // Quick return if possible. 311 if alpha == 0 && beta == 1 { 312 return 313 } 314 315 if alpha == 0 { 316 if beta == 0 { 317 for i := 0; i < m; i++ { 318 ci := c[i*ldc : i*ldc+n] 319 for j := range ci { 320 ci[j] = 0 321 } 322 } 323 } else { 324 for i := 0; i < m; i++ { 325 ci := c[i*ldc : i*ldc+n] 326 c128.ScalUnitary(beta, ci) 327 } 328 } 329 return 330 } 331 332 if side == blas.Left { 333 // Form C = alpha*A*B + beta*C. 334 for i := 0; i < m; i++ { 335 atmp := alpha * complex(real(a[i*lda+i]), 0) 336 bi := b[i*ldb : i*ldb+n] 337 ci := c[i*ldc : i*ldc+n] 338 if beta == 0 { 339 for j, bij := range bi { 340 ci[j] = atmp * bij 341 } 342 } else { 343 for j, bij := range bi { 344 ci[j] = atmp*bij + beta*ci[j] 345 } 346 } 347 if uplo == blas.Upper { 348 for k := 0; k < i; k++ { 349 atmp = alpha * cmplx.Conj(a[k*lda+i]) 350 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 351 } 352 for k := i + 1; k < m; k++ { 353 atmp = alpha * a[i*lda+k] 354 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 355 } 356 } else { 357 for k := 0; k < i; k++ { 358 atmp = alpha * a[i*lda+k] 359 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 360 } 361 for k := i + 1; k < m; k++ { 362 atmp = alpha * cmplx.Conj(a[k*lda+i]) 363 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 364 } 365 } 366 } 367 } else { 368 // Form C = alpha*B*A + beta*C. 369 if uplo == blas.Upper { 370 for i := 0; i < m; i++ { 371 for j := n - 1; j >= 0; j-- { 372 abij := alpha * b[i*ldb+j] 373 aj := a[j*lda+j+1 : j*lda+n] 374 bi := b[i*ldb+j+1 : i*ldb+n] 375 ci := c[i*ldc+j+1 : i*ldc+n] 376 var tmp complex128 377 for k, ajk := range aj { 378 ci[k] += abij * ajk 379 tmp += bi[k] * cmplx.Conj(ajk) 380 } 381 ajj := complex(real(a[j*lda+j]), 0) 382 if beta == 0 { 383 c[i*ldc+j] = abij*ajj + alpha*tmp 384 } else { 385 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 386 } 387 } 388 } 389 } else { 390 for i := 0; i < m; i++ { 391 for j := 0; j < n; j++ { 392 abij := alpha * b[i*ldb+j] 393 aj := a[j*lda : j*lda+j] 394 bi := b[i*ldb : i*ldb+j] 395 ci := c[i*ldc : i*ldc+j] 396 var tmp complex128 397 for k, ajk := range aj { 398 ci[k] += abij * ajk 399 tmp += bi[k] * cmplx.Conj(ajk) 400 } 401 ajj := complex(real(a[j*lda+j]), 0) 402 if beta == 0 { 403 c[i*ldc+j] = abij*ajj + alpha*tmp 404 } else { 405 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 406 } 407 } 408 } 409 } 410 } 411 } 412 413 // Zherk performs one of the hermitian rank-k operations 414 // 415 // C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans 416 // C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans 417 // 418 // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is 419 // an n×k matrix in the first case and a k×n matrix in the second case. 420 // 421 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 422 // on return they will be set to zero. 423 func (Implementation) Zherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float64, a []complex128, lda int, beta float64, c []complex128, ldc int) { 424 var rowA, colA int 425 switch trans { 426 default: 427 panic(badTranspose) 428 case blas.NoTrans: 429 rowA, colA = n, k 430 case blas.ConjTrans: 431 rowA, colA = k, n 432 } 433 switch { 434 case uplo != blas.Lower && uplo != blas.Upper: 435 panic(badUplo) 436 case n < 0: 437 panic(nLT0) 438 case k < 0: 439 panic(kLT0) 440 case lda < max(1, colA): 441 panic(badLdA) 442 case ldc < max(1, n): 443 panic(badLdC) 444 } 445 446 // Quick return if possible. 447 if n == 0 { 448 return 449 } 450 451 // For zero matrix size the following slice length checks are trivially satisfied. 452 if len(a) < (rowA-1)*lda+colA { 453 panic(shortA) 454 } 455 if len(c) < (n-1)*ldc+n { 456 panic(shortC) 457 } 458 459 // Quick return if possible. 460 if (alpha == 0 || k == 0) && beta == 1 { 461 return 462 } 463 464 if alpha == 0 { 465 if uplo == blas.Upper { 466 if beta == 0 { 467 for i := 0; i < n; i++ { 468 ci := c[i*ldc+i : i*ldc+n] 469 for j := range ci { 470 ci[j] = 0 471 } 472 } 473 } else { 474 for i := 0; i < n; i++ { 475 ci := c[i*ldc+i : i*ldc+n] 476 ci[0] = complex(beta*real(ci[0]), 0) 477 if i != n-1 { 478 c128.DscalUnitary(beta, ci[1:]) 479 } 480 } 481 } 482 } else { 483 if beta == 0 { 484 for i := 0; i < n; i++ { 485 ci := c[i*ldc : i*ldc+i+1] 486 for j := range ci { 487 ci[j] = 0 488 } 489 } 490 } else { 491 for i := 0; i < n; i++ { 492 ci := c[i*ldc : i*ldc+i+1] 493 if i != 0 { 494 c128.DscalUnitary(beta, ci[:i]) 495 } 496 ci[i] = complex(beta*real(ci[i]), 0) 497 } 498 } 499 } 500 return 501 } 502 503 calpha := complex(alpha, 0) 504 if trans == blas.NoTrans { 505 // Form C = alpha*A*Aᴴ + beta*C. 506 cbeta := complex(beta, 0) 507 if uplo == blas.Upper { 508 for i := 0; i < n; i++ { 509 ci := c[i*ldc+i : i*ldc+n] 510 ai := a[i*lda : i*lda+k] 511 switch { 512 case beta == 0: 513 // Handle the i-th diagonal element of C. 514 ci[0] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) 515 // Handle the remaining elements on the i-th row of C. 516 for jc := range ci[1:] { 517 j := i + 1 + jc 518 ci[jc+1] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) 519 } 520 case beta != 1: 521 cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[0] 522 ci[0] = complex(real(cii), 0) 523 for jc, cij := range ci[1:] { 524 j := i + 1 + jc 525 ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 526 } 527 default: 528 cii := calpha*c128.DotcUnitary(ai, ai) + ci[0] 529 ci[0] = complex(real(cii), 0) 530 for jc, cij := range ci[1:] { 531 j := i + 1 + jc 532 ci[jc+1] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 533 } 534 } 535 } 536 } else { 537 for i := 0; i < n; i++ { 538 ci := c[i*ldc : i*ldc+i+1] 539 ai := a[i*lda : i*lda+k] 540 switch { 541 case beta == 0: 542 // Handle the first i-1 elements on the i-th row of C. 543 for j := range ci[:i] { 544 ci[j] = calpha * c128.DotcUnitary(a[j*lda:j*lda+k], ai) 545 } 546 // Handle the i-th diagonal element of C. 547 ci[i] = complex(alpha*real(c128.DotcUnitary(ai, ai)), 0) 548 case beta != 1: 549 for j, cij := range ci[:i] { 550 ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 551 } 552 cii := calpha*c128.DotcUnitary(ai, ai) + cbeta*ci[i] 553 ci[i] = complex(real(cii), 0) 554 default: 555 for j, cij := range ci[:i] { 556 ci[j] = calpha*c128.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 557 } 558 cii := calpha*c128.DotcUnitary(ai, ai) + ci[i] 559 ci[i] = complex(real(cii), 0) 560 } 561 } 562 } 563 } else { 564 // Form C = alpha*Aᴴ*A + beta*C. 565 if uplo == blas.Upper { 566 for i := 0; i < n; i++ { 567 ci := c[i*ldc+i : i*ldc+n] 568 switch { 569 case beta == 0: 570 for jc := range ci { 571 ci[jc] = 0 572 } 573 case beta != 1: 574 c128.DscalUnitary(beta, ci) 575 ci[0] = complex(real(ci[0]), 0) 576 default: 577 ci[0] = complex(real(ci[0]), 0) 578 } 579 for j := 0; j < k; j++ { 580 aji := cmplx.Conj(a[j*lda+i]) 581 if aji != 0 { 582 c128.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) 583 } 584 } 585 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 586 } 587 } else { 588 for i := 0; i < n; i++ { 589 ci := c[i*ldc : i*ldc+i+1] 590 switch { 591 case beta == 0: 592 for j := range ci { 593 ci[j] = 0 594 } 595 case beta != 1: 596 c128.DscalUnitary(beta, ci) 597 ci[i] = complex(real(ci[i]), 0) 598 default: 599 ci[i] = complex(real(ci[i]), 0) 600 } 601 for j := 0; j < k; j++ { 602 aji := cmplx.Conj(a[j*lda+i]) 603 if aji != 0 { 604 c128.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) 605 } 606 } 607 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 608 } 609 } 610 } 611 } 612 613 // Zher2k performs one of the hermitian rank-2k operations 614 // 615 // C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans 616 // C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans 617 // 618 // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix 619 // and A and B are n×k matrices in the first case and k×n matrices in the second case. 620 // 621 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 622 // on return they will be set to zero. 623 func (Implementation) Zher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta float64, c []complex128, ldc int) { 624 var row, col int 625 switch trans { 626 default: 627 panic(badTranspose) 628 case blas.NoTrans: 629 row, col = n, k 630 case blas.ConjTrans: 631 row, col = k, n 632 } 633 switch { 634 case uplo != blas.Lower && uplo != blas.Upper: 635 panic(badUplo) 636 case n < 0: 637 panic(nLT0) 638 case k < 0: 639 panic(kLT0) 640 case lda < max(1, col): 641 panic(badLdA) 642 case ldb < max(1, col): 643 panic(badLdB) 644 case ldc < max(1, n): 645 panic(badLdC) 646 } 647 648 // Quick return if possible. 649 if n == 0 { 650 return 651 } 652 653 // For zero matrix size the following slice length checks are trivially satisfied. 654 if len(a) < (row-1)*lda+col { 655 panic(shortA) 656 } 657 if len(b) < (row-1)*ldb+col { 658 panic(shortB) 659 } 660 if len(c) < (n-1)*ldc+n { 661 panic(shortC) 662 } 663 664 // Quick return if possible. 665 if (alpha == 0 || k == 0) && beta == 1 { 666 return 667 } 668 669 if alpha == 0 { 670 if uplo == blas.Upper { 671 if beta == 0 { 672 for i := 0; i < n; i++ { 673 ci := c[i*ldc+i : i*ldc+n] 674 for j := range ci { 675 ci[j] = 0 676 } 677 } 678 } else { 679 for i := 0; i < n; i++ { 680 ci := c[i*ldc+i : i*ldc+n] 681 ci[0] = complex(beta*real(ci[0]), 0) 682 if i != n-1 { 683 c128.DscalUnitary(beta, ci[1:]) 684 } 685 } 686 } 687 } else { 688 if beta == 0 { 689 for i := 0; i < n; i++ { 690 ci := c[i*ldc : i*ldc+i+1] 691 for j := range ci { 692 ci[j] = 0 693 } 694 } 695 } else { 696 for i := 0; i < n; i++ { 697 ci := c[i*ldc : i*ldc+i+1] 698 if i != 0 { 699 c128.DscalUnitary(beta, ci[:i]) 700 } 701 ci[i] = complex(beta*real(ci[i]), 0) 702 } 703 } 704 } 705 return 706 } 707 708 conjalpha := cmplx.Conj(alpha) 709 cbeta := complex(beta, 0) 710 if trans == blas.NoTrans { 711 // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. 712 if uplo == blas.Upper { 713 for i := 0; i < n; i++ { 714 ci := c[i*ldc+i+1 : i*ldc+n] 715 ai := a[i*lda : i*lda+k] 716 bi := b[i*ldb : i*ldb+k] 717 if beta == 0 { 718 cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) 719 c[i*ldc+i] = complex(real(cii), 0) 720 for jc := range ci { 721 j := i + 1 + jc 722 ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) 723 } 724 } else { 725 cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 726 c[i*ldc+i] = complex(real(cii), 0) 727 for jc, cij := range ci { 728 j := i + 1 + jc 729 ci[jc] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 730 } 731 } 732 } 733 } else { 734 for i := 0; i < n; i++ { 735 ci := c[i*ldc : i*ldc+i] 736 ai := a[i*lda : i*lda+k] 737 bi := b[i*ldb : i*ldb+k] 738 if beta == 0 { 739 for j := range ci { 740 ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) 741 } 742 cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) 743 c[i*ldc+i] = complex(real(cii), 0) 744 } else { 745 for j, cij := range ci { 746 ci[j] = alpha*c128.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c128.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 747 } 748 cii := alpha*c128.DotcUnitary(bi, ai) + conjalpha*c128.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 749 c[i*ldc+i] = complex(real(cii), 0) 750 } 751 } 752 } 753 } else { 754 // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. 755 if uplo == blas.Upper { 756 for i := 0; i < n; i++ { 757 ci := c[i*ldc+i : i*ldc+n] 758 switch { 759 case beta == 0: 760 for jc := range ci { 761 ci[jc] = 0 762 } 763 case beta != 1: 764 c128.DscalUnitary(beta, ci) 765 ci[0] = complex(real(ci[0]), 0) 766 default: 767 ci[0] = complex(real(ci[0]), 0) 768 } 769 for j := 0; j < k; j++ { 770 aji := a[j*lda+i] 771 bji := b[j*ldb+i] 772 if aji != 0 { 773 c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) 774 } 775 if bji != 0 { 776 c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) 777 } 778 } 779 ci[0] = complex(real(ci[0]), 0) 780 } 781 } else { 782 for i := 0; i < n; i++ { 783 ci := c[i*ldc : i*ldc+i+1] 784 switch { 785 case beta == 0: 786 for j := range ci { 787 ci[j] = 0 788 } 789 case beta != 1: 790 c128.DscalUnitary(beta, ci) 791 ci[i] = complex(real(ci[i]), 0) 792 default: 793 ci[i] = complex(real(ci[i]), 0) 794 } 795 for j := 0; j < k; j++ { 796 aji := a[j*lda+i] 797 bji := b[j*ldb+i] 798 if aji != 0 { 799 c128.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) 800 } 801 if bji != 0 { 802 c128.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) 803 } 804 } 805 ci[i] = complex(real(ci[i]), 0) 806 } 807 } 808 } 809 } 810 811 // Zsymm performs one of the matrix-matrix operations 812 // 813 // C = alpha*A*B + beta*C if side == blas.Left 814 // C = alpha*B*A + beta*C if side == blas.Right 815 // 816 // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B 817 // and C are m×n matrices. 818 func (Implementation) Zsymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { 819 na := m 820 if side == blas.Right { 821 na = n 822 } 823 switch { 824 case side != blas.Left && side != blas.Right: 825 panic(badSide) 826 case uplo != blas.Lower && uplo != blas.Upper: 827 panic(badUplo) 828 case m < 0: 829 panic(mLT0) 830 case n < 0: 831 panic(nLT0) 832 case lda < max(1, na): 833 panic(badLdA) 834 case ldb < max(1, n): 835 panic(badLdB) 836 case ldc < max(1, n): 837 panic(badLdC) 838 } 839 840 // Quick return if possible. 841 if m == 0 || n == 0 { 842 return 843 } 844 845 // For zero matrix size the following slice length checks are trivially satisfied. 846 if len(a) < lda*(na-1)+na { 847 panic(shortA) 848 } 849 if len(b) < ldb*(m-1)+n { 850 panic(shortB) 851 } 852 if len(c) < ldc*(m-1)+n { 853 panic(shortC) 854 } 855 856 // Quick return if possible. 857 if alpha == 0 && beta == 1 { 858 return 859 } 860 861 if alpha == 0 { 862 if beta == 0 { 863 for i := 0; i < m; i++ { 864 ci := c[i*ldc : i*ldc+n] 865 for j := range ci { 866 ci[j] = 0 867 } 868 } 869 } else { 870 for i := 0; i < m; i++ { 871 ci := c[i*ldc : i*ldc+n] 872 c128.ScalUnitary(beta, ci) 873 } 874 } 875 return 876 } 877 878 if side == blas.Left { 879 // Form C = alpha*A*B + beta*C. 880 for i := 0; i < m; i++ { 881 atmp := alpha * a[i*lda+i] 882 bi := b[i*ldb : i*ldb+n] 883 ci := c[i*ldc : i*ldc+n] 884 if beta == 0 { 885 for j, bij := range bi { 886 ci[j] = atmp * bij 887 } 888 } else { 889 for j, bij := range bi { 890 ci[j] = atmp*bij + beta*ci[j] 891 } 892 } 893 if uplo == blas.Upper { 894 for k := 0; k < i; k++ { 895 atmp = alpha * a[k*lda+i] 896 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 897 } 898 for k := i + 1; k < m; k++ { 899 atmp = alpha * a[i*lda+k] 900 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 901 } 902 } else { 903 for k := 0; k < i; k++ { 904 atmp = alpha * a[i*lda+k] 905 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 906 } 907 for k := i + 1; k < m; k++ { 908 atmp = alpha * a[k*lda+i] 909 c128.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 910 } 911 } 912 } 913 } else { 914 // Form C = alpha*B*A + beta*C. 915 if uplo == blas.Upper { 916 for i := 0; i < m; i++ { 917 for j := n - 1; j >= 0; j-- { 918 abij := alpha * b[i*ldb+j] 919 aj := a[j*lda+j+1 : j*lda+n] 920 bi := b[i*ldb+j+1 : i*ldb+n] 921 ci := c[i*ldc+j+1 : i*ldc+n] 922 var tmp complex128 923 for k, ajk := range aj { 924 ci[k] += abij * ajk 925 tmp += bi[k] * ajk 926 } 927 if beta == 0 { 928 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 929 } else { 930 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 931 } 932 } 933 } 934 } else { 935 for i := 0; i < m; i++ { 936 for j := 0; j < n; j++ { 937 abij := alpha * b[i*ldb+j] 938 aj := a[j*lda : j*lda+j] 939 bi := b[i*ldb : i*ldb+j] 940 ci := c[i*ldc : i*ldc+j] 941 var tmp complex128 942 for k, ajk := range aj { 943 ci[k] += abij * ajk 944 tmp += bi[k] * ajk 945 } 946 if beta == 0 { 947 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 948 } else { 949 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 950 } 951 } 952 } 953 } 954 } 955 } 956 957 // Zsyrk performs one of the symmetric rank-k operations 958 // 959 // C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans 960 // C = alpha*Aᵀ*A + beta*C if trans == blas.Trans 961 // 962 // where alpha and beta are scalars, C is an n×n symmetric matrix and A is 963 // an n×k matrix in the first case and a k×n matrix in the second case. 964 func (Implementation) Zsyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, beta complex128, c []complex128, ldc int) { 965 var rowA, colA int 966 switch trans { 967 default: 968 panic(badTranspose) 969 case blas.NoTrans: 970 rowA, colA = n, k 971 case blas.Trans: 972 rowA, colA = k, n 973 } 974 switch { 975 case uplo != blas.Lower && uplo != blas.Upper: 976 panic(badUplo) 977 case n < 0: 978 panic(nLT0) 979 case k < 0: 980 panic(kLT0) 981 case lda < max(1, colA): 982 panic(badLdA) 983 case ldc < max(1, n): 984 panic(badLdC) 985 } 986 987 // Quick return if possible. 988 if n == 0 { 989 return 990 } 991 992 // For zero matrix size the following slice length checks are trivially satisfied. 993 if len(a) < (rowA-1)*lda+colA { 994 panic(shortA) 995 } 996 if len(c) < (n-1)*ldc+n { 997 panic(shortC) 998 } 999 1000 // Quick return if possible. 1001 if (alpha == 0 || k == 0) && beta == 1 { 1002 return 1003 } 1004 1005 if alpha == 0 { 1006 if uplo == blas.Upper { 1007 if beta == 0 { 1008 for i := 0; i < n; i++ { 1009 ci := c[i*ldc+i : i*ldc+n] 1010 for j := range ci { 1011 ci[j] = 0 1012 } 1013 } 1014 } else { 1015 for i := 0; i < n; i++ { 1016 ci := c[i*ldc+i : i*ldc+n] 1017 c128.ScalUnitary(beta, ci) 1018 } 1019 } 1020 } else { 1021 if beta == 0 { 1022 for i := 0; i < n; i++ { 1023 ci := c[i*ldc : i*ldc+i+1] 1024 for j := range ci { 1025 ci[j] = 0 1026 } 1027 } 1028 } else { 1029 for i := 0; i < n; i++ { 1030 ci := c[i*ldc : i*ldc+i+1] 1031 c128.ScalUnitary(beta, ci) 1032 } 1033 } 1034 } 1035 return 1036 } 1037 1038 if trans == blas.NoTrans { 1039 // Form C = alpha*A*Aᵀ + beta*C. 1040 if uplo == blas.Upper { 1041 for i := 0; i < n; i++ { 1042 ci := c[i*ldc+i : i*ldc+n] 1043 ai := a[i*lda : i*lda+k] 1044 if beta == 0 { 1045 for jc := range ci { 1046 j := i + jc 1047 ci[jc] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) 1048 } 1049 } else { 1050 for jc, cij := range ci { 1051 j := i + jc 1052 ci[jc] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) 1053 } 1054 } 1055 } 1056 } else { 1057 for i := 0; i < n; i++ { 1058 ci := c[i*ldc : i*ldc+i+1] 1059 ai := a[i*lda : i*lda+k] 1060 if beta == 0 { 1061 for j := range ci { 1062 ci[j] = alpha * c128.DotuUnitary(ai, a[j*lda:j*lda+k]) 1063 } 1064 } else { 1065 for j, cij := range ci { 1066 ci[j] = beta*cij + alpha*c128.DotuUnitary(ai, a[j*lda:j*lda+k]) 1067 } 1068 } 1069 } 1070 } 1071 } else { 1072 // Form C = alpha*Aᵀ*A + beta*C. 1073 if uplo == blas.Upper { 1074 for i := 0; i < n; i++ { 1075 ci := c[i*ldc+i : i*ldc+n] 1076 switch { 1077 case beta == 0: 1078 for jc := range ci { 1079 ci[jc] = 0 1080 } 1081 case beta != 1: 1082 for jc := range ci { 1083 ci[jc] *= beta 1084 } 1085 } 1086 for j := 0; j < k; j++ { 1087 aji := a[j*lda+i] 1088 if aji != 0 { 1089 c128.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) 1090 } 1091 } 1092 } 1093 } else { 1094 for i := 0; i < n; i++ { 1095 ci := c[i*ldc : i*ldc+i+1] 1096 switch { 1097 case beta == 0: 1098 for j := range ci { 1099 ci[j] = 0 1100 } 1101 case beta != 1: 1102 for j := range ci { 1103 ci[j] *= beta 1104 } 1105 } 1106 for j := 0; j < k; j++ { 1107 aji := a[j*lda+i] 1108 if aji != 0 { 1109 c128.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) 1110 } 1111 } 1112 } 1113 } 1114 } 1115 } 1116 1117 // Zsyr2k performs one of the symmetric rank-2k operations 1118 // 1119 // C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans 1120 // C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans 1121 // 1122 // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B 1123 // are n×k matrices in the first case and k×n matrices in the second case. 1124 func (Implementation) Zsyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex128, a []complex128, lda int, b []complex128, ldb int, beta complex128, c []complex128, ldc int) { 1125 var row, col int 1126 switch trans { 1127 default: 1128 panic(badTranspose) 1129 case blas.NoTrans: 1130 row, col = n, k 1131 case blas.Trans: 1132 row, col = k, n 1133 } 1134 switch { 1135 case uplo != blas.Lower && uplo != blas.Upper: 1136 panic(badUplo) 1137 case n < 0: 1138 panic(nLT0) 1139 case k < 0: 1140 panic(kLT0) 1141 case lda < max(1, col): 1142 panic(badLdA) 1143 case ldb < max(1, col): 1144 panic(badLdB) 1145 case ldc < max(1, n): 1146 panic(badLdC) 1147 } 1148 1149 // Quick return if possible. 1150 if n == 0 { 1151 return 1152 } 1153 1154 // For zero matrix size the following slice length checks are trivially satisfied. 1155 if len(a) < (row-1)*lda+col { 1156 panic(shortA) 1157 } 1158 if len(b) < (row-1)*ldb+col { 1159 panic(shortB) 1160 } 1161 if len(c) < (n-1)*ldc+n { 1162 panic(shortC) 1163 } 1164 1165 // Quick return if possible. 1166 if (alpha == 0 || k == 0) && beta == 1 { 1167 return 1168 } 1169 1170 if alpha == 0 { 1171 if uplo == blas.Upper { 1172 if beta == 0 { 1173 for i := 0; i < n; i++ { 1174 ci := c[i*ldc+i : i*ldc+n] 1175 for j := range ci { 1176 ci[j] = 0 1177 } 1178 } 1179 } else { 1180 for i := 0; i < n; i++ { 1181 ci := c[i*ldc+i : i*ldc+n] 1182 c128.ScalUnitary(beta, ci) 1183 } 1184 } 1185 } else { 1186 if beta == 0 { 1187 for i := 0; i < n; i++ { 1188 ci := c[i*ldc : i*ldc+i+1] 1189 for j := range ci { 1190 ci[j] = 0 1191 } 1192 } 1193 } else { 1194 for i := 0; i < n; i++ { 1195 ci := c[i*ldc : i*ldc+i+1] 1196 c128.ScalUnitary(beta, ci) 1197 } 1198 } 1199 } 1200 return 1201 } 1202 1203 if trans == blas.NoTrans { 1204 // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. 1205 if uplo == blas.Upper { 1206 for i := 0; i < n; i++ { 1207 ci := c[i*ldc+i : i*ldc+n] 1208 ai := a[i*lda : i*lda+k] 1209 bi := b[i*ldb : i*ldb+k] 1210 if beta == 0 { 1211 for jc := range ci { 1212 j := i + jc 1213 ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) 1214 } 1215 } else { 1216 for jc, cij := range ci { 1217 j := i + jc 1218 ci[jc] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1219 } 1220 } 1221 } 1222 } else { 1223 for i := 0; i < n; i++ { 1224 ci := c[i*ldc : i*ldc+i+1] 1225 ai := a[i*lda : i*lda+k] 1226 bi := b[i*ldb : i*ldb+k] 1227 if beta == 0 { 1228 for j := range ci { 1229 ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) 1230 } 1231 } else { 1232 for j, cij := range ci { 1233 ci[j] = alpha*c128.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c128.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1234 } 1235 } 1236 } 1237 } 1238 } else { 1239 // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. 1240 if uplo == blas.Upper { 1241 for i := 0; i < n; i++ { 1242 ci := c[i*ldc+i : i*ldc+n] 1243 switch { 1244 case beta == 0: 1245 for jc := range ci { 1246 ci[jc] = 0 1247 } 1248 case beta != 1: 1249 for jc := range ci { 1250 ci[jc] *= beta 1251 } 1252 } 1253 for j := 0; j < k; j++ { 1254 aji := a[j*lda+i] 1255 bji := b[j*ldb+i] 1256 if aji != 0 { 1257 c128.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) 1258 } 1259 if bji != 0 { 1260 c128.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) 1261 } 1262 } 1263 } 1264 } else { 1265 for i := 0; i < n; i++ { 1266 ci := c[i*ldc : i*ldc+i+1] 1267 switch { 1268 case beta == 0: 1269 for j := range ci { 1270 ci[j] = 0 1271 } 1272 case beta != 1: 1273 for j := range ci { 1274 ci[j] *= beta 1275 } 1276 } 1277 for j := 0; j < k; j++ { 1278 aji := a[j*lda+i] 1279 bji := b[j*ldb+i] 1280 if aji != 0 { 1281 c128.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) 1282 } 1283 if bji != 0 { 1284 c128.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) 1285 } 1286 } 1287 } 1288 } 1289 } 1290 } 1291 1292 // Ztrmm performs one of the matrix-matrix operations 1293 // 1294 // B = alpha * op(A) * B if side == blas.Left, 1295 // B = alpha * B * op(A) if side == blas.Right, 1296 // 1297 // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, 1298 // upper or lower triangular matrix and op(A) is one of 1299 // 1300 // op(A) = A if trans == blas.NoTrans, 1301 // op(A) = Aᵀ if trans == blas.Trans, 1302 // op(A) = Aᴴ if trans == blas.ConjTrans. 1303 func (Implementation) Ztrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { 1304 na := m 1305 if side == blas.Right { 1306 na = n 1307 } 1308 switch { 1309 case side != blas.Left && side != blas.Right: 1310 panic(badSide) 1311 case uplo != blas.Lower && uplo != blas.Upper: 1312 panic(badUplo) 1313 case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: 1314 panic(badTranspose) 1315 case diag != blas.Unit && diag != blas.NonUnit: 1316 panic(badDiag) 1317 case m < 0: 1318 panic(mLT0) 1319 case n < 0: 1320 panic(nLT0) 1321 case lda < max(1, na): 1322 panic(badLdA) 1323 case ldb < max(1, n): 1324 panic(badLdB) 1325 } 1326 1327 // Quick return if possible. 1328 if m == 0 || n == 0 { 1329 return 1330 } 1331 1332 // For zero matrix size the following slice length checks are trivially satisfied. 1333 if len(a) < (na-1)*lda+na { 1334 panic(shortA) 1335 } 1336 if len(b) < (m-1)*ldb+n { 1337 panic(shortB) 1338 } 1339 1340 // Quick return if possible. 1341 if alpha == 0 { 1342 for i := 0; i < m; i++ { 1343 bi := b[i*ldb : i*ldb+n] 1344 for j := range bi { 1345 bi[j] = 0 1346 } 1347 } 1348 return 1349 } 1350 1351 noConj := trans != blas.ConjTrans 1352 noUnit := diag == blas.NonUnit 1353 if side == blas.Left { 1354 if trans == blas.NoTrans { 1355 // Form B = alpha*A*B. 1356 if uplo == blas.Upper { 1357 for i := 0; i < m; i++ { 1358 aii := alpha 1359 if noUnit { 1360 aii *= a[i*lda+i] 1361 } 1362 bi := b[i*ldb : i*ldb+n] 1363 for j := range bi { 1364 bi[j] *= aii 1365 } 1366 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1367 j := ja + i + 1 1368 if aij != 0 { 1369 c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1370 } 1371 } 1372 } 1373 } else { 1374 for i := m - 1; i >= 0; i-- { 1375 aii := alpha 1376 if noUnit { 1377 aii *= a[i*lda+i] 1378 } 1379 bi := b[i*ldb : i*ldb+n] 1380 for j := range bi { 1381 bi[j] *= aii 1382 } 1383 for j, aij := range a[i*lda : i*lda+i] { 1384 if aij != 0 { 1385 c128.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1386 } 1387 } 1388 } 1389 } 1390 } else { 1391 // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. 1392 if uplo == blas.Upper { 1393 for k := m - 1; k >= 0; k-- { 1394 bk := b[k*ldb : k*ldb+n] 1395 for ja, ajk := range a[k*lda+k+1 : k*lda+m] { 1396 if ajk == 0 { 1397 continue 1398 } 1399 j := k + 1 + ja 1400 if noConj { 1401 c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1402 } else { 1403 c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1404 } 1405 } 1406 akk := alpha 1407 if noUnit { 1408 if noConj { 1409 akk *= a[k*lda+k] 1410 } else { 1411 akk *= cmplx.Conj(a[k*lda+k]) 1412 } 1413 } 1414 if akk != 1 { 1415 c128.ScalUnitary(akk, bk) 1416 } 1417 } 1418 } else { 1419 for k := 0; k < m; k++ { 1420 bk := b[k*ldb : k*ldb+n] 1421 for j, ajk := range a[k*lda : k*lda+k] { 1422 if ajk == 0 { 1423 continue 1424 } 1425 if noConj { 1426 c128.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1427 } else { 1428 c128.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1429 } 1430 } 1431 akk := alpha 1432 if noUnit { 1433 if noConj { 1434 akk *= a[k*lda+k] 1435 } else { 1436 akk *= cmplx.Conj(a[k*lda+k]) 1437 } 1438 } 1439 if akk != 1 { 1440 c128.ScalUnitary(akk, bk) 1441 } 1442 } 1443 } 1444 } 1445 } else { 1446 if trans == blas.NoTrans { 1447 // Form B = alpha*B*A. 1448 if uplo == blas.Upper { 1449 for i := 0; i < m; i++ { 1450 bi := b[i*ldb : i*ldb+n] 1451 for k := n - 1; k >= 0; k-- { 1452 abik := alpha * bi[k] 1453 if abik == 0 { 1454 continue 1455 } 1456 bi[k] = abik 1457 if noUnit { 1458 bi[k] *= a[k*lda+k] 1459 } 1460 c128.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) 1461 } 1462 } 1463 } else { 1464 for i := 0; i < m; i++ { 1465 bi := b[i*ldb : i*ldb+n] 1466 for k := 0; k < n; k++ { 1467 abik := alpha * bi[k] 1468 if abik == 0 { 1469 continue 1470 } 1471 bi[k] = abik 1472 if noUnit { 1473 bi[k] *= a[k*lda+k] 1474 } 1475 c128.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) 1476 } 1477 } 1478 } 1479 } else { 1480 // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. 1481 if uplo == blas.Upper { 1482 for i := 0; i < m; i++ { 1483 bi := b[i*ldb : i*ldb+n] 1484 for j, bij := range bi { 1485 if noConj { 1486 if noUnit { 1487 bij *= a[j*lda+j] 1488 } 1489 bij += c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1490 } else { 1491 if noUnit { 1492 bij *= cmplx.Conj(a[j*lda+j]) 1493 } 1494 bij += c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1495 } 1496 bi[j] = alpha * bij 1497 } 1498 } 1499 } else { 1500 for i := 0; i < m; i++ { 1501 bi := b[i*ldb : i*ldb+n] 1502 for j := n - 1; j >= 0; j-- { 1503 bij := bi[j] 1504 if noConj { 1505 if noUnit { 1506 bij *= a[j*lda+j] 1507 } 1508 bij += c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1509 } else { 1510 if noUnit { 1511 bij *= cmplx.Conj(a[j*lda+j]) 1512 } 1513 bij += c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1514 } 1515 bi[j] = alpha * bij 1516 } 1517 } 1518 } 1519 } 1520 } 1521 } 1522 1523 // Ztrsm solves one of the matrix equations 1524 // 1525 // op(A) * X = alpha * B if side == blas.Left, 1526 // X * op(A) = alpha * B if side == blas.Right, 1527 // 1528 // where alpha is a scalar, X and B are m×n matrices, A is a unit or 1529 // non-unit, upper or lower triangular matrix and op(A) is one of 1530 // 1531 // op(A) = A if transA == blas.NoTrans, 1532 // op(A) = Aᵀ if transA == blas.Trans, 1533 // op(A) = Aᴴ if transA == blas.ConjTrans. 1534 // 1535 // On return the matrix X is overwritten on B. 1536 func (Implementation) Ztrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex128, a []complex128, lda int, b []complex128, ldb int) { 1537 na := m 1538 if side == blas.Right { 1539 na = n 1540 } 1541 switch { 1542 case side != blas.Left && side != blas.Right: 1543 panic(badSide) 1544 case uplo != blas.Lower && uplo != blas.Upper: 1545 panic(badUplo) 1546 case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: 1547 panic(badTranspose) 1548 case diag != blas.Unit && diag != blas.NonUnit: 1549 panic(badDiag) 1550 case m < 0: 1551 panic(mLT0) 1552 case n < 0: 1553 panic(nLT0) 1554 case lda < max(1, na): 1555 panic(badLdA) 1556 case ldb < max(1, n): 1557 panic(badLdB) 1558 } 1559 1560 // Quick return if possible. 1561 if m == 0 || n == 0 { 1562 return 1563 } 1564 1565 // For zero matrix size the following slice length checks are trivially satisfied. 1566 if len(a) < (na-1)*lda+na { 1567 panic(shortA) 1568 } 1569 if len(b) < (m-1)*ldb+n { 1570 panic(shortB) 1571 } 1572 1573 if alpha == 0 { 1574 for i := 0; i < m; i++ { 1575 for j := 0; j < n; j++ { 1576 b[i*ldb+j] = 0 1577 } 1578 } 1579 return 1580 } 1581 1582 noConj := transA != blas.ConjTrans 1583 noUnit := diag == blas.NonUnit 1584 if side == blas.Left { 1585 if transA == blas.NoTrans { 1586 // Form B = alpha*inv(A)*B. 1587 if uplo == blas.Upper { 1588 for i := m - 1; i >= 0; i-- { 1589 bi := b[i*ldb : i*ldb+n] 1590 if alpha != 1 { 1591 c128.ScalUnitary(alpha, bi) 1592 } 1593 for ka, aik := range a[i*lda+i+1 : i*lda+m] { 1594 k := i + 1 + ka 1595 if aik != 0 { 1596 c128.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) 1597 } 1598 } 1599 if noUnit { 1600 c128.ScalUnitary(1/a[i*lda+i], bi) 1601 } 1602 } 1603 } else { 1604 for i := 0; i < m; i++ { 1605 bi := b[i*ldb : i*ldb+n] 1606 if alpha != 1 { 1607 c128.ScalUnitary(alpha, bi) 1608 } 1609 for j, aij := range a[i*lda : i*lda+i] { 1610 if aij != 0 { 1611 c128.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) 1612 } 1613 } 1614 if noUnit { 1615 c128.ScalUnitary(1/a[i*lda+i], bi) 1616 } 1617 } 1618 } 1619 } else { 1620 // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. 1621 if uplo == blas.Upper { 1622 for i := 0; i < m; i++ { 1623 bi := b[i*ldb : i*ldb+n] 1624 if noUnit { 1625 if noConj { 1626 c128.ScalUnitary(1/a[i*lda+i], bi) 1627 } else { 1628 c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1629 } 1630 } 1631 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1632 if aij == 0 { 1633 continue 1634 } 1635 j := i + 1 + ja 1636 if noConj { 1637 c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1638 } else { 1639 c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1640 } 1641 } 1642 if alpha != 1 { 1643 c128.ScalUnitary(alpha, bi) 1644 } 1645 } 1646 } else { 1647 for i := m - 1; i >= 0; i-- { 1648 bi := b[i*ldb : i*ldb+n] 1649 if noUnit { 1650 if noConj { 1651 c128.ScalUnitary(1/a[i*lda+i], bi) 1652 } else { 1653 c128.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1654 } 1655 } 1656 for j, aij := range a[i*lda : i*lda+i] { 1657 if aij == 0 { 1658 continue 1659 } 1660 if noConj { 1661 c128.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1662 } else { 1663 c128.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1664 } 1665 } 1666 if alpha != 1 { 1667 c128.ScalUnitary(alpha, bi) 1668 } 1669 } 1670 } 1671 } 1672 } else { 1673 if transA == blas.NoTrans { 1674 // Form B = alpha*B*inv(A). 1675 if uplo == blas.Upper { 1676 for i := 0; i < m; i++ { 1677 bi := b[i*ldb : i*ldb+n] 1678 if alpha != 1 { 1679 c128.ScalUnitary(alpha, bi) 1680 } 1681 for j, bij := range bi { 1682 if bij == 0 { 1683 continue 1684 } 1685 if noUnit { 1686 bi[j] /= a[j*lda+j] 1687 } 1688 c128.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1689 } 1690 } 1691 } else { 1692 for i := 0; i < m; i++ { 1693 bi := b[i*ldb : i*ldb+n] 1694 if alpha != 1 { 1695 c128.ScalUnitary(alpha, bi) 1696 } 1697 for j := n - 1; j >= 0; j-- { 1698 if bi[j] == 0 { 1699 continue 1700 } 1701 if noUnit { 1702 bi[j] /= a[j*lda+j] 1703 } 1704 c128.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) 1705 } 1706 } 1707 } 1708 } else { 1709 // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). 1710 if uplo == blas.Upper { 1711 for i := 0; i < m; i++ { 1712 bi := b[i*ldb : i*ldb+n] 1713 for j := n - 1; j >= 0; j-- { 1714 bij := alpha * bi[j] 1715 if noConj { 1716 bij -= c128.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1717 if noUnit { 1718 bij /= a[j*lda+j] 1719 } 1720 } else { 1721 bij -= c128.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1722 if noUnit { 1723 bij /= cmplx.Conj(a[j*lda+j]) 1724 } 1725 } 1726 bi[j] = bij 1727 } 1728 } 1729 } else { 1730 for i := 0; i < m; i++ { 1731 bi := b[i*ldb : i*ldb+n] 1732 for j, bij := range bi { 1733 bij *= alpha 1734 if noConj { 1735 bij -= c128.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1736 if noUnit { 1737 bij /= a[j*lda+j] 1738 } 1739 } else { 1740 bij -= c128.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1741 if noUnit { 1742 bij /= cmplx.Conj(a[j*lda+j]) 1743 } 1744 } 1745 bi[j] = bij 1746 } 1747 } 1748 } 1749 } 1750 } 1751 }