gonum.org/v1/gonum@v0.14.0/blas/gonum/level3cmplx64.go (about) 1 // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2019 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 cmplx "gonum.org/v1/gonum/internal/cmplx64" 11 12 "gonum.org/v1/gonum/blas" 13 "gonum.org/v1/gonum/internal/asm/c64" 14 ) 15 16 var _ blas.Complex64Level3 = Implementation{} 17 18 // Cgemm performs one of the matrix-matrix operations 19 // 20 // C = alpha * op(A) * op(B) + beta * C 21 // 22 // where op(X) is one of 23 // 24 // op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, 25 // 26 // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, 27 // op(B) a k×n matrix and C an m×n matrix. 28 // 29 // Complex64 implementations are autogenerated and not directly tested. 30 func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 31 switch tA { 32 default: 33 panic(badTranspose) 34 case blas.NoTrans, blas.Trans, blas.ConjTrans: 35 } 36 switch tB { 37 default: 38 panic(badTranspose) 39 case blas.NoTrans, blas.Trans, blas.ConjTrans: 40 } 41 switch { 42 case m < 0: 43 panic(mLT0) 44 case n < 0: 45 panic(nLT0) 46 case k < 0: 47 panic(kLT0) 48 } 49 rowA, colA := m, k 50 if tA != blas.NoTrans { 51 rowA, colA = k, m 52 } 53 if lda < max(1, colA) { 54 panic(badLdA) 55 } 56 rowB, colB := k, n 57 if tB != blas.NoTrans { 58 rowB, colB = n, k 59 } 60 if ldb < max(1, colB) { 61 panic(badLdB) 62 } 63 if ldc < max(1, n) { 64 panic(badLdC) 65 } 66 67 // Quick return if possible. 68 if m == 0 || n == 0 { 69 return 70 } 71 72 // For zero matrix size the following slice length checks are trivially satisfied. 73 if len(a) < (rowA-1)*lda+colA { 74 panic(shortA) 75 } 76 if len(b) < (rowB-1)*ldb+colB { 77 panic(shortB) 78 } 79 if len(c) < (m-1)*ldc+n { 80 panic(shortC) 81 } 82 83 // Quick return if possible. 84 if (alpha == 0 || k == 0) && beta == 1 { 85 return 86 } 87 88 if alpha == 0 { 89 if beta == 0 { 90 for i := 0; i < m; i++ { 91 for j := 0; j < n; j++ { 92 c[i*ldc+j] = 0 93 } 94 } 95 } else { 96 for i := 0; i < m; i++ { 97 for j := 0; j < n; j++ { 98 c[i*ldc+j] *= beta 99 } 100 } 101 } 102 return 103 } 104 105 switch tA { 106 case blas.NoTrans: 107 switch tB { 108 case blas.NoTrans: 109 // Form C = alpha * A * B + beta * C. 110 for i := 0; i < m; i++ { 111 switch { 112 case beta == 0: 113 for j := 0; j < n; j++ { 114 c[i*ldc+j] = 0 115 } 116 case beta != 1: 117 for j := 0; j < n; j++ { 118 c[i*ldc+j] *= beta 119 } 120 } 121 for l := 0; l < k; l++ { 122 tmp := alpha * a[i*lda+l] 123 for j := 0; j < n; j++ { 124 c[i*ldc+j] += tmp * b[l*ldb+j] 125 } 126 } 127 } 128 case blas.Trans: 129 // Form C = alpha * A * Bᵀ + beta * C. 130 for i := 0; i < m; i++ { 131 switch { 132 case beta == 0: 133 for j := 0; j < n; j++ { 134 c[i*ldc+j] = 0 135 } 136 case beta != 1: 137 for j := 0; j < n; j++ { 138 c[i*ldc+j] *= beta 139 } 140 } 141 for l := 0; l < k; l++ { 142 tmp := alpha * a[i*lda+l] 143 for j := 0; j < n; j++ { 144 c[i*ldc+j] += tmp * b[j*ldb+l] 145 } 146 } 147 } 148 case blas.ConjTrans: 149 // Form C = alpha * A * Bᴴ + beta * C. 150 for i := 0; i < m; i++ { 151 switch { 152 case beta == 0: 153 for j := 0; j < n; j++ { 154 c[i*ldc+j] = 0 155 } 156 case beta != 1: 157 for j := 0; j < n; j++ { 158 c[i*ldc+j] *= beta 159 } 160 } 161 for l := 0; l < k; l++ { 162 tmp := alpha * a[i*lda+l] 163 for j := 0; j < n; j++ { 164 c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) 165 } 166 } 167 } 168 } 169 case blas.Trans: 170 switch tB { 171 case blas.NoTrans: 172 // Form C = alpha * Aᵀ * B + beta * C. 173 for i := 0; i < m; i++ { 174 for j := 0; j < n; j++ { 175 var tmp complex64 176 for l := 0; l < k; l++ { 177 tmp += a[l*lda+i] * b[l*ldb+j] 178 } 179 if beta == 0 { 180 c[i*ldc+j] = alpha * tmp 181 } else { 182 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 183 } 184 } 185 } 186 case blas.Trans: 187 // Form C = alpha * Aᵀ * Bᵀ + beta * C. 188 for i := 0; i < m; i++ { 189 for j := 0; j < n; j++ { 190 var tmp complex64 191 for l := 0; l < k; l++ { 192 tmp += a[l*lda+i] * b[j*ldb+l] 193 } 194 if beta == 0 { 195 c[i*ldc+j] = alpha * tmp 196 } else { 197 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 198 } 199 } 200 } 201 case blas.ConjTrans: 202 // Form C = alpha * Aᵀ * Bᴴ + beta * C. 203 for i := 0; i < m; i++ { 204 for j := 0; j < n; j++ { 205 var tmp complex64 206 for l := 0; l < k; l++ { 207 tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) 208 } 209 if beta == 0 { 210 c[i*ldc+j] = alpha * tmp 211 } else { 212 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 213 } 214 } 215 } 216 } 217 case blas.ConjTrans: 218 switch tB { 219 case blas.NoTrans: 220 // Form C = alpha * Aᴴ * B + beta * C. 221 for i := 0; i < m; i++ { 222 for j := 0; j < n; j++ { 223 var tmp complex64 224 for l := 0; l < k; l++ { 225 tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] 226 } 227 if beta == 0 { 228 c[i*ldc+j] = alpha * tmp 229 } else { 230 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 231 } 232 } 233 } 234 case blas.Trans: 235 // Form C = alpha * Aᴴ * Bᵀ + beta * C. 236 for i := 0; i < m; i++ { 237 for j := 0; j < n; j++ { 238 var tmp complex64 239 for l := 0; l < k; l++ { 240 tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] 241 } 242 if beta == 0 { 243 c[i*ldc+j] = alpha * tmp 244 } else { 245 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 246 } 247 } 248 } 249 case blas.ConjTrans: 250 // Form C = alpha * Aᴴ * Bᴴ + beta * C. 251 for i := 0; i < m; i++ { 252 for j := 0; j < n; j++ { 253 var tmp complex64 254 for l := 0; l < k; l++ { 255 tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) 256 } 257 if beta == 0 { 258 c[i*ldc+j] = alpha * tmp 259 } else { 260 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 261 } 262 } 263 } 264 } 265 } 266 } 267 268 // Chemm performs one of the matrix-matrix operations 269 // 270 // C = alpha*A*B + beta*C if side == blas.Left 271 // C = alpha*B*A + beta*C if side == blas.Right 272 // 273 // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B 274 // and C are m×n matrices. The imaginary parts of the diagonal elements of A are 275 // assumed to be zero. 276 // 277 // Complex64 implementations are autogenerated and not directly tested. 278 func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 279 na := m 280 if side == blas.Right { 281 na = n 282 } 283 switch { 284 case side != blas.Left && side != blas.Right: 285 panic(badSide) 286 case uplo != blas.Lower && uplo != blas.Upper: 287 panic(badUplo) 288 case m < 0: 289 panic(mLT0) 290 case n < 0: 291 panic(nLT0) 292 case lda < max(1, na): 293 panic(badLdA) 294 case ldb < max(1, n): 295 panic(badLdB) 296 case ldc < max(1, n): 297 panic(badLdC) 298 } 299 300 // Quick return if possible. 301 if m == 0 || n == 0 { 302 return 303 } 304 305 // For zero matrix size the following slice length checks are trivially satisfied. 306 if len(a) < lda*(na-1)+na { 307 panic(shortA) 308 } 309 if len(b) < ldb*(m-1)+n { 310 panic(shortB) 311 } 312 if len(c) < ldc*(m-1)+n { 313 panic(shortC) 314 } 315 316 // Quick return if possible. 317 if alpha == 0 && beta == 1 { 318 return 319 } 320 321 if alpha == 0 { 322 if beta == 0 { 323 for i := 0; i < m; i++ { 324 ci := c[i*ldc : i*ldc+n] 325 for j := range ci { 326 ci[j] = 0 327 } 328 } 329 } else { 330 for i := 0; i < m; i++ { 331 ci := c[i*ldc : i*ldc+n] 332 c64.ScalUnitary(beta, ci) 333 } 334 } 335 return 336 } 337 338 if side == blas.Left { 339 // Form C = alpha*A*B + beta*C. 340 for i := 0; i < m; i++ { 341 atmp := alpha * complex(real(a[i*lda+i]), 0) 342 bi := b[i*ldb : i*ldb+n] 343 ci := c[i*ldc : i*ldc+n] 344 if beta == 0 { 345 for j, bij := range bi { 346 ci[j] = atmp * bij 347 } 348 } else { 349 for j, bij := range bi { 350 ci[j] = atmp*bij + beta*ci[j] 351 } 352 } 353 if uplo == blas.Upper { 354 for k := 0; k < i; k++ { 355 atmp = alpha * cmplx.Conj(a[k*lda+i]) 356 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 357 } 358 for k := i + 1; k < m; k++ { 359 atmp = alpha * a[i*lda+k] 360 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 361 } 362 } else { 363 for k := 0; k < i; k++ { 364 atmp = alpha * a[i*lda+k] 365 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 366 } 367 for k := i + 1; k < m; k++ { 368 atmp = alpha * cmplx.Conj(a[k*lda+i]) 369 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 370 } 371 } 372 } 373 } else { 374 // Form C = alpha*B*A + beta*C. 375 if uplo == blas.Upper { 376 for i := 0; i < m; i++ { 377 for j := n - 1; j >= 0; j-- { 378 abij := alpha * b[i*ldb+j] 379 aj := a[j*lda+j+1 : j*lda+n] 380 bi := b[i*ldb+j+1 : i*ldb+n] 381 ci := c[i*ldc+j+1 : i*ldc+n] 382 var tmp complex64 383 for k, ajk := range aj { 384 ci[k] += abij * ajk 385 tmp += bi[k] * cmplx.Conj(ajk) 386 } 387 ajj := complex(real(a[j*lda+j]), 0) 388 if beta == 0 { 389 c[i*ldc+j] = abij*ajj + alpha*tmp 390 } else { 391 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 392 } 393 } 394 } 395 } else { 396 for i := 0; i < m; i++ { 397 for j := 0; j < n; j++ { 398 abij := alpha * b[i*ldb+j] 399 aj := a[j*lda : j*lda+j] 400 bi := b[i*ldb : i*ldb+j] 401 ci := c[i*ldc : i*ldc+j] 402 var tmp complex64 403 for k, ajk := range aj { 404 ci[k] += abij * ajk 405 tmp += bi[k] * cmplx.Conj(ajk) 406 } 407 ajj := complex(real(a[j*lda+j]), 0) 408 if beta == 0 { 409 c[i*ldc+j] = abij*ajj + alpha*tmp 410 } else { 411 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 412 } 413 } 414 } 415 } 416 } 417 } 418 419 // Cherk performs one of the hermitian rank-k operations 420 // 421 // C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans 422 // C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans 423 // 424 // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is 425 // an n×k matrix in the first case and a k×n matrix in the second case. 426 // 427 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 428 // on return they will be set to zero. 429 // 430 // Complex64 implementations are autogenerated and not directly tested. 431 func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) { 432 var rowA, colA int 433 switch trans { 434 default: 435 panic(badTranspose) 436 case blas.NoTrans: 437 rowA, colA = n, k 438 case blas.ConjTrans: 439 rowA, colA = k, n 440 } 441 switch { 442 case uplo != blas.Lower && uplo != blas.Upper: 443 panic(badUplo) 444 case n < 0: 445 panic(nLT0) 446 case k < 0: 447 panic(kLT0) 448 case lda < max(1, colA): 449 panic(badLdA) 450 case ldc < max(1, n): 451 panic(badLdC) 452 } 453 454 // Quick return if possible. 455 if n == 0 { 456 return 457 } 458 459 // For zero matrix size the following slice length checks are trivially satisfied. 460 if len(a) < (rowA-1)*lda+colA { 461 panic(shortA) 462 } 463 if len(c) < (n-1)*ldc+n { 464 panic(shortC) 465 } 466 467 // Quick return if possible. 468 if (alpha == 0 || k == 0) && beta == 1 { 469 return 470 } 471 472 if alpha == 0 { 473 if uplo == blas.Upper { 474 if beta == 0 { 475 for i := 0; i < n; i++ { 476 ci := c[i*ldc+i : i*ldc+n] 477 for j := range ci { 478 ci[j] = 0 479 } 480 } 481 } else { 482 for i := 0; i < n; i++ { 483 ci := c[i*ldc+i : i*ldc+n] 484 ci[0] = complex(beta*real(ci[0]), 0) 485 if i != n-1 { 486 c64.SscalUnitary(beta, ci[1:]) 487 } 488 } 489 } 490 } else { 491 if beta == 0 { 492 for i := 0; i < n; i++ { 493 ci := c[i*ldc : i*ldc+i+1] 494 for j := range ci { 495 ci[j] = 0 496 } 497 } 498 } else { 499 for i := 0; i < n; i++ { 500 ci := c[i*ldc : i*ldc+i+1] 501 if i != 0 { 502 c64.SscalUnitary(beta, ci[:i]) 503 } 504 ci[i] = complex(beta*real(ci[i]), 0) 505 } 506 } 507 } 508 return 509 } 510 511 calpha := complex(alpha, 0) 512 if trans == blas.NoTrans { 513 // Form C = alpha*A*Aᴴ + beta*C. 514 cbeta := complex(beta, 0) 515 if uplo == blas.Upper { 516 for i := 0; i < n; i++ { 517 ci := c[i*ldc+i : i*ldc+n] 518 ai := a[i*lda : i*lda+k] 519 switch { 520 case beta == 0: 521 // Handle the i-th diagonal element of C. 522 ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) 523 // Handle the remaining elements on the i-th row of C. 524 for jc := range ci[1:] { 525 j := i + 1 + jc 526 ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) 527 } 528 case beta != 1: 529 cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0] 530 ci[0] = complex(real(cii), 0) 531 for jc, cij := range ci[1:] { 532 j := i + 1 + jc 533 ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 534 } 535 default: 536 cii := calpha*c64.DotcUnitary(ai, ai) + ci[0] 537 ci[0] = complex(real(cii), 0) 538 for jc, cij := range ci[1:] { 539 j := i + 1 + jc 540 ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 541 } 542 } 543 } 544 } else { 545 for i := 0; i < n; i++ { 546 ci := c[i*ldc : i*ldc+i+1] 547 ai := a[i*lda : i*lda+k] 548 switch { 549 case beta == 0: 550 // Handle the first i-1 elements on the i-th row of C. 551 for j := range ci[:i] { 552 ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) 553 } 554 // Handle the i-th diagonal element of C. 555 ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) 556 case beta != 1: 557 for j, cij := range ci[:i] { 558 ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 559 } 560 cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i] 561 ci[i] = complex(real(cii), 0) 562 default: 563 for j, cij := range ci[:i] { 564 ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 565 } 566 cii := calpha*c64.DotcUnitary(ai, ai) + ci[i] 567 ci[i] = complex(real(cii), 0) 568 } 569 } 570 } 571 } else { 572 // Form C = alpha*Aᴴ*A + beta*C. 573 if uplo == blas.Upper { 574 for i := 0; i < n; i++ { 575 ci := c[i*ldc+i : i*ldc+n] 576 switch { 577 case beta == 0: 578 for jc := range ci { 579 ci[jc] = 0 580 } 581 case beta != 1: 582 c64.SscalUnitary(beta, ci) 583 ci[0] = complex(real(ci[0]), 0) 584 default: 585 ci[0] = complex(real(ci[0]), 0) 586 } 587 for j := 0; j < k; j++ { 588 aji := cmplx.Conj(a[j*lda+i]) 589 if aji != 0 { 590 c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) 591 } 592 } 593 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 594 } 595 } else { 596 for i := 0; i < n; i++ { 597 ci := c[i*ldc : i*ldc+i+1] 598 switch { 599 case beta == 0: 600 for j := range ci { 601 ci[j] = 0 602 } 603 case beta != 1: 604 c64.SscalUnitary(beta, ci) 605 ci[i] = complex(real(ci[i]), 0) 606 default: 607 ci[i] = complex(real(ci[i]), 0) 608 } 609 for j := 0; j < k; j++ { 610 aji := cmplx.Conj(a[j*lda+i]) 611 if aji != 0 { 612 c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) 613 } 614 } 615 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 616 } 617 } 618 } 619 } 620 621 // Cher2k performs one of the hermitian rank-2k operations 622 // 623 // C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans 624 // C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans 625 // 626 // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix 627 // and A and B are n×k matrices in the first case and k×n matrices in the second case. 628 // 629 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 630 // on return they will be set to zero. 631 // 632 // Complex64 implementations are autogenerated and not directly tested. 633 func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) { 634 var row, col int 635 switch trans { 636 default: 637 panic(badTranspose) 638 case blas.NoTrans: 639 row, col = n, k 640 case blas.ConjTrans: 641 row, col = k, n 642 } 643 switch { 644 case uplo != blas.Lower && uplo != blas.Upper: 645 panic(badUplo) 646 case n < 0: 647 panic(nLT0) 648 case k < 0: 649 panic(kLT0) 650 case lda < max(1, col): 651 panic(badLdA) 652 case ldb < max(1, col): 653 panic(badLdB) 654 case ldc < max(1, n): 655 panic(badLdC) 656 } 657 658 // Quick return if possible. 659 if n == 0 { 660 return 661 } 662 663 // For zero matrix size the following slice length checks are trivially satisfied. 664 if len(a) < (row-1)*lda+col { 665 panic(shortA) 666 } 667 if len(b) < (row-1)*ldb+col { 668 panic(shortB) 669 } 670 if len(c) < (n-1)*ldc+n { 671 panic(shortC) 672 } 673 674 // Quick return if possible. 675 if (alpha == 0 || k == 0) && beta == 1 { 676 return 677 } 678 679 if alpha == 0 { 680 if uplo == blas.Upper { 681 if beta == 0 { 682 for i := 0; i < n; i++ { 683 ci := c[i*ldc+i : i*ldc+n] 684 for j := range ci { 685 ci[j] = 0 686 } 687 } 688 } else { 689 for i := 0; i < n; i++ { 690 ci := c[i*ldc+i : i*ldc+n] 691 ci[0] = complex(beta*real(ci[0]), 0) 692 if i != n-1 { 693 c64.SscalUnitary(beta, ci[1:]) 694 } 695 } 696 } 697 } else { 698 if beta == 0 { 699 for i := 0; i < n; i++ { 700 ci := c[i*ldc : i*ldc+i+1] 701 for j := range ci { 702 ci[j] = 0 703 } 704 } 705 } else { 706 for i := 0; i < n; i++ { 707 ci := c[i*ldc : i*ldc+i+1] 708 if i != 0 { 709 c64.SscalUnitary(beta, ci[:i]) 710 } 711 ci[i] = complex(beta*real(ci[i]), 0) 712 } 713 } 714 } 715 return 716 } 717 718 conjalpha := cmplx.Conj(alpha) 719 cbeta := complex(beta, 0) 720 if trans == blas.NoTrans { 721 // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. 722 if uplo == blas.Upper { 723 for i := 0; i < n; i++ { 724 ci := c[i*ldc+i+1 : i*ldc+n] 725 ai := a[i*lda : i*lda+k] 726 bi := b[i*ldb : i*ldb+k] 727 if beta == 0 { 728 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) 729 c[i*ldc+i] = complex(real(cii), 0) 730 for jc := range ci { 731 j := i + 1 + jc 732 ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) 733 } 734 } else { 735 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 736 c[i*ldc+i] = complex(real(cii), 0) 737 for jc, cij := range ci { 738 j := i + 1 + jc 739 ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 740 } 741 } 742 } 743 } else { 744 for i := 0; i < n; i++ { 745 ci := c[i*ldc : i*ldc+i] 746 ai := a[i*lda : i*lda+k] 747 bi := b[i*ldb : i*ldb+k] 748 if beta == 0 { 749 for j := range ci { 750 ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) 751 } 752 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) 753 c[i*ldc+i] = complex(real(cii), 0) 754 } else { 755 for j, cij := range ci { 756 ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 757 } 758 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 759 c[i*ldc+i] = complex(real(cii), 0) 760 } 761 } 762 } 763 } else { 764 // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. 765 if uplo == blas.Upper { 766 for i := 0; i < n; i++ { 767 ci := c[i*ldc+i : i*ldc+n] 768 switch { 769 case beta == 0: 770 for jc := range ci { 771 ci[jc] = 0 772 } 773 case beta != 1: 774 c64.SscalUnitary(beta, ci) 775 ci[0] = complex(real(ci[0]), 0) 776 default: 777 ci[0] = complex(real(ci[0]), 0) 778 } 779 for j := 0; j < k; j++ { 780 aji := a[j*lda+i] 781 bji := b[j*ldb+i] 782 if aji != 0 { 783 c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) 784 } 785 if bji != 0 { 786 c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) 787 } 788 } 789 ci[0] = complex(real(ci[0]), 0) 790 } 791 } else { 792 for i := 0; i < n; i++ { 793 ci := c[i*ldc : i*ldc+i+1] 794 switch { 795 case beta == 0: 796 for j := range ci { 797 ci[j] = 0 798 } 799 case beta != 1: 800 c64.SscalUnitary(beta, ci) 801 ci[i] = complex(real(ci[i]), 0) 802 default: 803 ci[i] = complex(real(ci[i]), 0) 804 } 805 for j := 0; j < k; j++ { 806 aji := a[j*lda+i] 807 bji := b[j*ldb+i] 808 if aji != 0 { 809 c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) 810 } 811 if bji != 0 { 812 c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) 813 } 814 } 815 ci[i] = complex(real(ci[i]), 0) 816 } 817 } 818 } 819 } 820 821 // Csymm performs one of the matrix-matrix operations 822 // 823 // C = alpha*A*B + beta*C if side == blas.Left 824 // C = alpha*B*A + beta*C if side == blas.Right 825 // 826 // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B 827 // and C are m×n matrices. 828 // 829 // Complex64 implementations are autogenerated and not directly tested. 830 func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 831 na := m 832 if side == blas.Right { 833 na = n 834 } 835 switch { 836 case side != blas.Left && side != blas.Right: 837 panic(badSide) 838 case uplo != blas.Lower && uplo != blas.Upper: 839 panic(badUplo) 840 case m < 0: 841 panic(mLT0) 842 case n < 0: 843 panic(nLT0) 844 case lda < max(1, na): 845 panic(badLdA) 846 case ldb < max(1, n): 847 panic(badLdB) 848 case ldc < max(1, n): 849 panic(badLdC) 850 } 851 852 // Quick return if possible. 853 if m == 0 || n == 0 { 854 return 855 } 856 857 // For zero matrix size the following slice length checks are trivially satisfied. 858 if len(a) < lda*(na-1)+na { 859 panic(shortA) 860 } 861 if len(b) < ldb*(m-1)+n { 862 panic(shortB) 863 } 864 if len(c) < ldc*(m-1)+n { 865 panic(shortC) 866 } 867 868 // Quick return if possible. 869 if alpha == 0 && beta == 1 { 870 return 871 } 872 873 if alpha == 0 { 874 if beta == 0 { 875 for i := 0; i < m; i++ { 876 ci := c[i*ldc : i*ldc+n] 877 for j := range ci { 878 ci[j] = 0 879 } 880 } 881 } else { 882 for i := 0; i < m; i++ { 883 ci := c[i*ldc : i*ldc+n] 884 c64.ScalUnitary(beta, ci) 885 } 886 } 887 return 888 } 889 890 if side == blas.Left { 891 // Form C = alpha*A*B + beta*C. 892 for i := 0; i < m; i++ { 893 atmp := alpha * a[i*lda+i] 894 bi := b[i*ldb : i*ldb+n] 895 ci := c[i*ldc : i*ldc+n] 896 if beta == 0 { 897 for j, bij := range bi { 898 ci[j] = atmp * bij 899 } 900 } else { 901 for j, bij := range bi { 902 ci[j] = atmp*bij + beta*ci[j] 903 } 904 } 905 if uplo == blas.Upper { 906 for k := 0; k < i; k++ { 907 atmp = alpha * a[k*lda+i] 908 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 909 } 910 for k := i + 1; k < m; k++ { 911 atmp = alpha * a[i*lda+k] 912 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 913 } 914 } else { 915 for k := 0; k < i; k++ { 916 atmp = alpha * a[i*lda+k] 917 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 918 } 919 for k := i + 1; k < m; k++ { 920 atmp = alpha * a[k*lda+i] 921 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 922 } 923 } 924 } 925 } else { 926 // Form C = alpha*B*A + beta*C. 927 if uplo == blas.Upper { 928 for i := 0; i < m; i++ { 929 for j := n - 1; j >= 0; j-- { 930 abij := alpha * b[i*ldb+j] 931 aj := a[j*lda+j+1 : j*lda+n] 932 bi := b[i*ldb+j+1 : i*ldb+n] 933 ci := c[i*ldc+j+1 : i*ldc+n] 934 var tmp complex64 935 for k, ajk := range aj { 936 ci[k] += abij * ajk 937 tmp += bi[k] * ajk 938 } 939 if beta == 0 { 940 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 941 } else { 942 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 943 } 944 } 945 } 946 } else { 947 for i := 0; i < m; i++ { 948 for j := 0; j < n; j++ { 949 abij := alpha * b[i*ldb+j] 950 aj := a[j*lda : j*lda+j] 951 bi := b[i*ldb : i*ldb+j] 952 ci := c[i*ldc : i*ldc+j] 953 var tmp complex64 954 for k, ajk := range aj { 955 ci[k] += abij * ajk 956 tmp += bi[k] * ajk 957 } 958 if beta == 0 { 959 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 960 } else { 961 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 962 } 963 } 964 } 965 } 966 } 967 } 968 969 // Csyrk performs one of the symmetric rank-k operations 970 // 971 // C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans 972 // C = alpha*Aᵀ*A + beta*C if trans == blas.Trans 973 // 974 // where alpha and beta are scalars, C is an n×n symmetric matrix and A is 975 // an n×k matrix in the first case and a k×n matrix in the second case. 976 // 977 // Complex64 implementations are autogenerated and not directly tested. 978 func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) { 979 var rowA, colA int 980 switch trans { 981 default: 982 panic(badTranspose) 983 case blas.NoTrans: 984 rowA, colA = n, k 985 case blas.Trans: 986 rowA, colA = k, n 987 } 988 switch { 989 case uplo != blas.Lower && uplo != blas.Upper: 990 panic(badUplo) 991 case n < 0: 992 panic(nLT0) 993 case k < 0: 994 panic(kLT0) 995 case lda < max(1, colA): 996 panic(badLdA) 997 case ldc < max(1, n): 998 panic(badLdC) 999 } 1000 1001 // Quick return if possible. 1002 if n == 0 { 1003 return 1004 } 1005 1006 // For zero matrix size the following slice length checks are trivially satisfied. 1007 if len(a) < (rowA-1)*lda+colA { 1008 panic(shortA) 1009 } 1010 if len(c) < (n-1)*ldc+n { 1011 panic(shortC) 1012 } 1013 1014 // Quick return if possible. 1015 if (alpha == 0 || k == 0) && beta == 1 { 1016 return 1017 } 1018 1019 if alpha == 0 { 1020 if uplo == blas.Upper { 1021 if beta == 0 { 1022 for i := 0; i < n; i++ { 1023 ci := c[i*ldc+i : i*ldc+n] 1024 for j := range ci { 1025 ci[j] = 0 1026 } 1027 } 1028 } else { 1029 for i := 0; i < n; i++ { 1030 ci := c[i*ldc+i : i*ldc+n] 1031 c64.ScalUnitary(beta, ci) 1032 } 1033 } 1034 } else { 1035 if beta == 0 { 1036 for i := 0; i < n; i++ { 1037 ci := c[i*ldc : i*ldc+i+1] 1038 for j := range ci { 1039 ci[j] = 0 1040 } 1041 } 1042 } else { 1043 for i := 0; i < n; i++ { 1044 ci := c[i*ldc : i*ldc+i+1] 1045 c64.ScalUnitary(beta, ci) 1046 } 1047 } 1048 } 1049 return 1050 } 1051 1052 if trans == blas.NoTrans { 1053 // Form C = alpha*A*Aᵀ + beta*C. 1054 if uplo == blas.Upper { 1055 for i := 0; i < n; i++ { 1056 ci := c[i*ldc+i : i*ldc+n] 1057 ai := a[i*lda : i*lda+k] 1058 if beta == 0 { 1059 for jc := range ci { 1060 j := i + jc 1061 ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1062 } 1063 } else { 1064 for jc, cij := range ci { 1065 j := i + jc 1066 ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1067 } 1068 } 1069 } 1070 } else { 1071 for i := 0; i < n; i++ { 1072 ci := c[i*ldc : i*ldc+i+1] 1073 ai := a[i*lda : i*lda+k] 1074 if beta == 0 { 1075 for j := range ci { 1076 ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1077 } 1078 } else { 1079 for j, cij := range ci { 1080 ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1081 } 1082 } 1083 } 1084 } 1085 } else { 1086 // Form C = alpha*Aᵀ*A + beta*C. 1087 if uplo == blas.Upper { 1088 for i := 0; i < n; i++ { 1089 ci := c[i*ldc+i : i*ldc+n] 1090 switch { 1091 case beta == 0: 1092 for jc := range ci { 1093 ci[jc] = 0 1094 } 1095 case beta != 1: 1096 for jc := range ci { 1097 ci[jc] *= beta 1098 } 1099 } 1100 for j := 0; j < k; j++ { 1101 aji := a[j*lda+i] 1102 if aji != 0 { 1103 c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) 1104 } 1105 } 1106 } 1107 } else { 1108 for i := 0; i < n; i++ { 1109 ci := c[i*ldc : i*ldc+i+1] 1110 switch { 1111 case beta == 0: 1112 for j := range ci { 1113 ci[j] = 0 1114 } 1115 case beta != 1: 1116 for j := range ci { 1117 ci[j] *= beta 1118 } 1119 } 1120 for j := 0; j < k; j++ { 1121 aji := a[j*lda+i] 1122 if aji != 0 { 1123 c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) 1124 } 1125 } 1126 } 1127 } 1128 } 1129 } 1130 1131 // Csyr2k performs one of the symmetric rank-2k operations 1132 // 1133 // C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans 1134 // C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans 1135 // 1136 // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B 1137 // are n×k matrices in the first case and k×n matrices in the second case. 1138 // 1139 // Complex64 implementations are autogenerated and not directly tested. 1140 func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 1141 var row, col int 1142 switch trans { 1143 default: 1144 panic(badTranspose) 1145 case blas.NoTrans: 1146 row, col = n, k 1147 case blas.Trans: 1148 row, col = k, n 1149 } 1150 switch { 1151 case uplo != blas.Lower && uplo != blas.Upper: 1152 panic(badUplo) 1153 case n < 0: 1154 panic(nLT0) 1155 case k < 0: 1156 panic(kLT0) 1157 case lda < max(1, col): 1158 panic(badLdA) 1159 case ldb < max(1, col): 1160 panic(badLdB) 1161 case ldc < max(1, n): 1162 panic(badLdC) 1163 } 1164 1165 // Quick return if possible. 1166 if n == 0 { 1167 return 1168 } 1169 1170 // For zero matrix size the following slice length checks are trivially satisfied. 1171 if len(a) < (row-1)*lda+col { 1172 panic(shortA) 1173 } 1174 if len(b) < (row-1)*ldb+col { 1175 panic(shortB) 1176 } 1177 if len(c) < (n-1)*ldc+n { 1178 panic(shortC) 1179 } 1180 1181 // Quick return if possible. 1182 if (alpha == 0 || k == 0) && beta == 1 { 1183 return 1184 } 1185 1186 if alpha == 0 { 1187 if uplo == blas.Upper { 1188 if beta == 0 { 1189 for i := 0; i < n; i++ { 1190 ci := c[i*ldc+i : i*ldc+n] 1191 for j := range ci { 1192 ci[j] = 0 1193 } 1194 } 1195 } else { 1196 for i := 0; i < n; i++ { 1197 ci := c[i*ldc+i : i*ldc+n] 1198 c64.ScalUnitary(beta, ci) 1199 } 1200 } 1201 } else { 1202 if beta == 0 { 1203 for i := 0; i < n; i++ { 1204 ci := c[i*ldc : i*ldc+i+1] 1205 for j := range ci { 1206 ci[j] = 0 1207 } 1208 } 1209 } else { 1210 for i := 0; i < n; i++ { 1211 ci := c[i*ldc : i*ldc+i+1] 1212 c64.ScalUnitary(beta, ci) 1213 } 1214 } 1215 } 1216 return 1217 } 1218 1219 if trans == blas.NoTrans { 1220 // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. 1221 if uplo == blas.Upper { 1222 for i := 0; i < n; i++ { 1223 ci := c[i*ldc+i : i*ldc+n] 1224 ai := a[i*lda : i*lda+k] 1225 bi := b[i*ldb : i*ldb+k] 1226 if beta == 0 { 1227 for jc := range ci { 1228 j := i + jc 1229 ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) 1230 } 1231 } else { 1232 for jc, cij := range ci { 1233 j := i + jc 1234 ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1235 } 1236 } 1237 } 1238 } else { 1239 for i := 0; i < n; i++ { 1240 ci := c[i*ldc : i*ldc+i+1] 1241 ai := a[i*lda : i*lda+k] 1242 bi := b[i*ldb : i*ldb+k] 1243 if beta == 0 { 1244 for j := range ci { 1245 ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) 1246 } 1247 } else { 1248 for j, cij := range ci { 1249 ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1250 } 1251 } 1252 } 1253 } 1254 } else { 1255 // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. 1256 if uplo == blas.Upper { 1257 for i := 0; i < n; i++ { 1258 ci := c[i*ldc+i : i*ldc+n] 1259 switch { 1260 case beta == 0: 1261 for jc := range ci { 1262 ci[jc] = 0 1263 } 1264 case beta != 1: 1265 for jc := range ci { 1266 ci[jc] *= beta 1267 } 1268 } 1269 for j := 0; j < k; j++ { 1270 aji := a[j*lda+i] 1271 bji := b[j*ldb+i] 1272 if aji != 0 { 1273 c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) 1274 } 1275 if bji != 0 { 1276 c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) 1277 } 1278 } 1279 } 1280 } else { 1281 for i := 0; i < n; i++ { 1282 ci := c[i*ldc : i*ldc+i+1] 1283 switch { 1284 case beta == 0: 1285 for j := range ci { 1286 ci[j] = 0 1287 } 1288 case beta != 1: 1289 for j := range ci { 1290 ci[j] *= beta 1291 } 1292 } 1293 for j := 0; j < k; j++ { 1294 aji := a[j*lda+i] 1295 bji := b[j*ldb+i] 1296 if aji != 0 { 1297 c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) 1298 } 1299 if bji != 0 { 1300 c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) 1301 } 1302 } 1303 } 1304 } 1305 } 1306 } 1307 1308 // Ctrmm performs one of the matrix-matrix operations 1309 // 1310 // B = alpha * op(A) * B if side == blas.Left, 1311 // B = alpha * B * op(A) if side == blas.Right, 1312 // 1313 // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, 1314 // upper or lower triangular matrix and op(A) is one of 1315 // 1316 // op(A) = A if trans == blas.NoTrans, 1317 // op(A) = Aᵀ if trans == blas.Trans, 1318 // op(A) = Aᴴ if trans == blas.ConjTrans. 1319 // 1320 // Complex64 implementations are autogenerated and not directly tested. 1321 func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { 1322 na := m 1323 if side == blas.Right { 1324 na = n 1325 } 1326 switch { 1327 case side != blas.Left && side != blas.Right: 1328 panic(badSide) 1329 case uplo != blas.Lower && uplo != blas.Upper: 1330 panic(badUplo) 1331 case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: 1332 panic(badTranspose) 1333 case diag != blas.Unit && diag != blas.NonUnit: 1334 panic(badDiag) 1335 case m < 0: 1336 panic(mLT0) 1337 case n < 0: 1338 panic(nLT0) 1339 case lda < max(1, na): 1340 panic(badLdA) 1341 case ldb < max(1, n): 1342 panic(badLdB) 1343 } 1344 1345 // Quick return if possible. 1346 if m == 0 || n == 0 { 1347 return 1348 } 1349 1350 // For zero matrix size the following slice length checks are trivially satisfied. 1351 if len(a) < (na-1)*lda+na { 1352 panic(shortA) 1353 } 1354 if len(b) < (m-1)*ldb+n { 1355 panic(shortB) 1356 } 1357 1358 // Quick return if possible. 1359 if alpha == 0 { 1360 for i := 0; i < m; i++ { 1361 bi := b[i*ldb : i*ldb+n] 1362 for j := range bi { 1363 bi[j] = 0 1364 } 1365 } 1366 return 1367 } 1368 1369 noConj := trans != blas.ConjTrans 1370 noUnit := diag == blas.NonUnit 1371 if side == blas.Left { 1372 if trans == blas.NoTrans { 1373 // Form B = alpha*A*B. 1374 if uplo == blas.Upper { 1375 for i := 0; i < m; i++ { 1376 aii := alpha 1377 if noUnit { 1378 aii *= a[i*lda+i] 1379 } 1380 bi := b[i*ldb : i*ldb+n] 1381 for j := range bi { 1382 bi[j] *= aii 1383 } 1384 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1385 j := ja + i + 1 1386 if aij != 0 { 1387 c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1388 } 1389 } 1390 } 1391 } else { 1392 for i := m - 1; i >= 0; i-- { 1393 aii := alpha 1394 if noUnit { 1395 aii *= a[i*lda+i] 1396 } 1397 bi := b[i*ldb : i*ldb+n] 1398 for j := range bi { 1399 bi[j] *= aii 1400 } 1401 for j, aij := range a[i*lda : i*lda+i] { 1402 if aij != 0 { 1403 c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1404 } 1405 } 1406 } 1407 } 1408 } else { 1409 // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. 1410 if uplo == blas.Upper { 1411 for k := m - 1; k >= 0; k-- { 1412 bk := b[k*ldb : k*ldb+n] 1413 for ja, ajk := range a[k*lda+k+1 : k*lda+m] { 1414 if ajk == 0 { 1415 continue 1416 } 1417 j := k + 1 + ja 1418 if noConj { 1419 c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1420 } else { 1421 c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1422 } 1423 } 1424 akk := alpha 1425 if noUnit { 1426 if noConj { 1427 akk *= a[k*lda+k] 1428 } else { 1429 akk *= cmplx.Conj(a[k*lda+k]) 1430 } 1431 } 1432 if akk != 1 { 1433 c64.ScalUnitary(akk, bk) 1434 } 1435 } 1436 } else { 1437 for k := 0; k < m; k++ { 1438 bk := b[k*ldb : k*ldb+n] 1439 for j, ajk := range a[k*lda : k*lda+k] { 1440 if ajk == 0 { 1441 continue 1442 } 1443 if noConj { 1444 c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1445 } else { 1446 c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1447 } 1448 } 1449 akk := alpha 1450 if noUnit { 1451 if noConj { 1452 akk *= a[k*lda+k] 1453 } else { 1454 akk *= cmplx.Conj(a[k*lda+k]) 1455 } 1456 } 1457 if akk != 1 { 1458 c64.ScalUnitary(akk, bk) 1459 } 1460 } 1461 } 1462 } 1463 } else { 1464 if trans == blas.NoTrans { 1465 // Form B = alpha*B*A. 1466 if uplo == blas.Upper { 1467 for i := 0; i < m; i++ { 1468 bi := b[i*ldb : i*ldb+n] 1469 for k := n - 1; k >= 0; k-- { 1470 abik := alpha * bi[k] 1471 if abik == 0 { 1472 continue 1473 } 1474 bi[k] = abik 1475 if noUnit { 1476 bi[k] *= a[k*lda+k] 1477 } 1478 c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) 1479 } 1480 } 1481 } else { 1482 for i := 0; i < m; i++ { 1483 bi := b[i*ldb : i*ldb+n] 1484 for k := 0; k < n; k++ { 1485 abik := alpha * bi[k] 1486 if abik == 0 { 1487 continue 1488 } 1489 bi[k] = abik 1490 if noUnit { 1491 bi[k] *= a[k*lda+k] 1492 } 1493 c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) 1494 } 1495 } 1496 } 1497 } else { 1498 // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. 1499 if uplo == blas.Upper { 1500 for i := 0; i < m; i++ { 1501 bi := b[i*ldb : i*ldb+n] 1502 for j, bij := range bi { 1503 if noConj { 1504 if noUnit { 1505 bij *= a[j*lda+j] 1506 } 1507 bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1508 } else { 1509 if noUnit { 1510 bij *= cmplx.Conj(a[j*lda+j]) 1511 } 1512 bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1513 } 1514 bi[j] = alpha * bij 1515 } 1516 } 1517 } else { 1518 for i := 0; i < m; i++ { 1519 bi := b[i*ldb : i*ldb+n] 1520 for j := n - 1; j >= 0; j-- { 1521 bij := bi[j] 1522 if noConj { 1523 if noUnit { 1524 bij *= a[j*lda+j] 1525 } 1526 bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1527 } else { 1528 if noUnit { 1529 bij *= cmplx.Conj(a[j*lda+j]) 1530 } 1531 bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1532 } 1533 bi[j] = alpha * bij 1534 } 1535 } 1536 } 1537 } 1538 } 1539 } 1540 1541 // Ctrsm solves one of the matrix equations 1542 // 1543 // op(A) * X = alpha * B if side == blas.Left, 1544 // X * op(A) = alpha * B if side == blas.Right, 1545 // 1546 // where alpha is a scalar, X and B are m×n matrices, A is a unit or 1547 // non-unit, upper or lower triangular matrix and op(A) is one of 1548 // 1549 // op(A) = A if transA == blas.NoTrans, 1550 // op(A) = Aᵀ if transA == blas.Trans, 1551 // op(A) = Aᴴ if transA == blas.ConjTrans. 1552 // 1553 // On return the matrix X is overwritten on B. 1554 // 1555 // Complex64 implementations are autogenerated and not directly tested. 1556 func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { 1557 na := m 1558 if side == blas.Right { 1559 na = n 1560 } 1561 switch { 1562 case side != blas.Left && side != blas.Right: 1563 panic(badSide) 1564 case uplo != blas.Lower && uplo != blas.Upper: 1565 panic(badUplo) 1566 case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: 1567 panic(badTranspose) 1568 case diag != blas.Unit && diag != blas.NonUnit: 1569 panic(badDiag) 1570 case m < 0: 1571 panic(mLT0) 1572 case n < 0: 1573 panic(nLT0) 1574 case lda < max(1, na): 1575 panic(badLdA) 1576 case ldb < max(1, n): 1577 panic(badLdB) 1578 } 1579 1580 // Quick return if possible. 1581 if m == 0 || n == 0 { 1582 return 1583 } 1584 1585 // For zero matrix size the following slice length checks are trivially satisfied. 1586 if len(a) < (na-1)*lda+na { 1587 panic(shortA) 1588 } 1589 if len(b) < (m-1)*ldb+n { 1590 panic(shortB) 1591 } 1592 1593 if alpha == 0 { 1594 for i := 0; i < m; i++ { 1595 for j := 0; j < n; j++ { 1596 b[i*ldb+j] = 0 1597 } 1598 } 1599 return 1600 } 1601 1602 noConj := transA != blas.ConjTrans 1603 noUnit := diag == blas.NonUnit 1604 if side == blas.Left { 1605 if transA == blas.NoTrans { 1606 // Form B = alpha*inv(A)*B. 1607 if uplo == blas.Upper { 1608 for i := m - 1; i >= 0; i-- { 1609 bi := b[i*ldb : i*ldb+n] 1610 if alpha != 1 { 1611 c64.ScalUnitary(alpha, bi) 1612 } 1613 for ka, aik := range a[i*lda+i+1 : i*lda+m] { 1614 k := i + 1 + ka 1615 if aik != 0 { 1616 c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) 1617 } 1618 } 1619 if noUnit { 1620 c64.ScalUnitary(1/a[i*lda+i], bi) 1621 } 1622 } 1623 } else { 1624 for i := 0; i < m; i++ { 1625 bi := b[i*ldb : i*ldb+n] 1626 if alpha != 1 { 1627 c64.ScalUnitary(alpha, bi) 1628 } 1629 for j, aij := range a[i*lda : i*lda+i] { 1630 if aij != 0 { 1631 c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) 1632 } 1633 } 1634 if noUnit { 1635 c64.ScalUnitary(1/a[i*lda+i], bi) 1636 } 1637 } 1638 } 1639 } else { 1640 // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. 1641 if uplo == blas.Upper { 1642 for i := 0; i < m; i++ { 1643 bi := b[i*ldb : i*ldb+n] 1644 if noUnit { 1645 if noConj { 1646 c64.ScalUnitary(1/a[i*lda+i], bi) 1647 } else { 1648 c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1649 } 1650 } 1651 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1652 if aij == 0 { 1653 continue 1654 } 1655 j := i + 1 + ja 1656 if noConj { 1657 c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1658 } else { 1659 c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1660 } 1661 } 1662 if alpha != 1 { 1663 c64.ScalUnitary(alpha, bi) 1664 } 1665 } 1666 } else { 1667 for i := m - 1; i >= 0; i-- { 1668 bi := b[i*ldb : i*ldb+n] 1669 if noUnit { 1670 if noConj { 1671 c64.ScalUnitary(1/a[i*lda+i], bi) 1672 } else { 1673 c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1674 } 1675 } 1676 for j, aij := range a[i*lda : i*lda+i] { 1677 if aij == 0 { 1678 continue 1679 } 1680 if noConj { 1681 c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1682 } else { 1683 c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1684 } 1685 } 1686 if alpha != 1 { 1687 c64.ScalUnitary(alpha, bi) 1688 } 1689 } 1690 } 1691 } 1692 } else { 1693 if transA == blas.NoTrans { 1694 // Form B = alpha*B*inv(A). 1695 if uplo == blas.Upper { 1696 for i := 0; i < m; i++ { 1697 bi := b[i*ldb : i*ldb+n] 1698 if alpha != 1 { 1699 c64.ScalUnitary(alpha, bi) 1700 } 1701 for j, bij := range bi { 1702 if bij == 0 { 1703 continue 1704 } 1705 if noUnit { 1706 bi[j] /= a[j*lda+j] 1707 } 1708 c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1709 } 1710 } 1711 } else { 1712 for i := 0; i < m; i++ { 1713 bi := b[i*ldb : i*ldb+n] 1714 if alpha != 1 { 1715 c64.ScalUnitary(alpha, bi) 1716 } 1717 for j := n - 1; j >= 0; j-- { 1718 if bi[j] == 0 { 1719 continue 1720 } 1721 if noUnit { 1722 bi[j] /= a[j*lda+j] 1723 } 1724 c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) 1725 } 1726 } 1727 } 1728 } else { 1729 // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). 1730 if uplo == blas.Upper { 1731 for i := 0; i < m; i++ { 1732 bi := b[i*ldb : i*ldb+n] 1733 for j := n - 1; j >= 0; j-- { 1734 bij := alpha * bi[j] 1735 if noConj { 1736 bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1737 if noUnit { 1738 bij /= a[j*lda+j] 1739 } 1740 } else { 1741 bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1742 if noUnit { 1743 bij /= cmplx.Conj(a[j*lda+j]) 1744 } 1745 } 1746 bi[j] = bij 1747 } 1748 } 1749 } else { 1750 for i := 0; i < m; i++ { 1751 bi := b[i*ldb : i*ldb+n] 1752 for j, bij := range bi { 1753 bij *= alpha 1754 if noConj { 1755 bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1756 if noUnit { 1757 bij /= a[j*lda+j] 1758 } 1759 } else { 1760 bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1761 if noUnit { 1762 bij /= cmplx.Conj(a[j*lda+j]) 1763 } 1764 } 1765 bi[j] = bij 1766 } 1767 } 1768 } 1769 } 1770 } 1771 }