github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level3cmplx64.go (about) 1 // Code generated by "go generate github.com/jingcheng-WU/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2019 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 cmplx "github.com/jingcheng-WU/gonum/internal/cmplx64" 11 12 "github.com/jingcheng-WU/gonum/blas" 13 "github.com/jingcheng-WU/gonum/internal/asm/c64" 14 ) 15 16 var _ blas.Complex64Level3 = Implementation{} 17 18 // Cgemm performs one of the matrix-matrix operations 19 // C = alpha * op(A) * op(B) + beta * C 20 // where op(X) is one of 21 // op(X) = X or op(X) = Xᵀ or op(X) = Xᴴ, 22 // alpha and beta are scalars, and A, B and C are matrices, with op(A) an m×k matrix, 23 // op(B) a k×n matrix and C an m×n matrix. 24 // 25 // Complex64 implementations are autogenerated and not directly tested. 26 func (Implementation) Cgemm(tA, tB blas.Transpose, m, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 27 switch tA { 28 default: 29 panic(badTranspose) 30 case blas.NoTrans, blas.Trans, blas.ConjTrans: 31 } 32 switch tB { 33 default: 34 panic(badTranspose) 35 case blas.NoTrans, blas.Trans, blas.ConjTrans: 36 } 37 switch { 38 case m < 0: 39 panic(mLT0) 40 case n < 0: 41 panic(nLT0) 42 case k < 0: 43 panic(kLT0) 44 } 45 rowA, colA := m, k 46 if tA != blas.NoTrans { 47 rowA, colA = k, m 48 } 49 if lda < max(1, colA) { 50 panic(badLdA) 51 } 52 rowB, colB := k, n 53 if tB != blas.NoTrans { 54 rowB, colB = n, k 55 } 56 if ldb < max(1, colB) { 57 panic(badLdB) 58 } 59 if ldc < max(1, n) { 60 panic(badLdC) 61 } 62 63 // Quick return if possible. 64 if m == 0 || n == 0 { 65 return 66 } 67 68 // For zero matrix size the following slice length checks are trivially satisfied. 69 if len(a) < (rowA-1)*lda+colA { 70 panic(shortA) 71 } 72 if len(b) < (rowB-1)*ldb+colB { 73 panic(shortB) 74 } 75 if len(c) < (m-1)*ldc+n { 76 panic(shortC) 77 } 78 79 // Quick return if possible. 80 if (alpha == 0 || k == 0) && beta == 1 { 81 return 82 } 83 84 if alpha == 0 { 85 if beta == 0 { 86 for i := 0; i < m; i++ { 87 for j := 0; j < n; j++ { 88 c[i*ldc+j] = 0 89 } 90 } 91 } else { 92 for i := 0; i < m; i++ { 93 for j := 0; j < n; j++ { 94 c[i*ldc+j] *= beta 95 } 96 } 97 } 98 return 99 } 100 101 switch tA { 102 case blas.NoTrans: 103 switch tB { 104 case blas.NoTrans: 105 // Form C = alpha * A * B + beta * C. 106 for i := 0; i < m; i++ { 107 switch { 108 case beta == 0: 109 for j := 0; j < n; j++ { 110 c[i*ldc+j] = 0 111 } 112 case beta != 1: 113 for j := 0; j < n; j++ { 114 c[i*ldc+j] *= beta 115 } 116 } 117 for l := 0; l < k; l++ { 118 tmp := alpha * a[i*lda+l] 119 for j := 0; j < n; j++ { 120 c[i*ldc+j] += tmp * b[l*ldb+j] 121 } 122 } 123 } 124 case blas.Trans: 125 // Form C = alpha * A * Bᵀ + beta * C. 126 for i := 0; i < m; i++ { 127 switch { 128 case beta == 0: 129 for j := 0; j < n; j++ { 130 c[i*ldc+j] = 0 131 } 132 case beta != 1: 133 for j := 0; j < n; j++ { 134 c[i*ldc+j] *= beta 135 } 136 } 137 for l := 0; l < k; l++ { 138 tmp := alpha * a[i*lda+l] 139 for j := 0; j < n; j++ { 140 c[i*ldc+j] += tmp * b[j*ldb+l] 141 } 142 } 143 } 144 case blas.ConjTrans: 145 // Form C = alpha * A * Bᴴ + beta * C. 146 for i := 0; i < m; i++ { 147 switch { 148 case beta == 0: 149 for j := 0; j < n; j++ { 150 c[i*ldc+j] = 0 151 } 152 case beta != 1: 153 for j := 0; j < n; j++ { 154 c[i*ldc+j] *= beta 155 } 156 } 157 for l := 0; l < k; l++ { 158 tmp := alpha * a[i*lda+l] 159 for j := 0; j < n; j++ { 160 c[i*ldc+j] += tmp * cmplx.Conj(b[j*ldb+l]) 161 } 162 } 163 } 164 } 165 case blas.Trans: 166 switch tB { 167 case blas.NoTrans: 168 // Form C = alpha * Aᵀ * B + beta * C. 169 for i := 0; i < m; i++ { 170 for j := 0; j < n; j++ { 171 var tmp complex64 172 for l := 0; l < k; l++ { 173 tmp += a[l*lda+i] * b[l*ldb+j] 174 } 175 if beta == 0 { 176 c[i*ldc+j] = alpha * tmp 177 } else { 178 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 179 } 180 } 181 } 182 case blas.Trans: 183 // Form C = alpha * Aᵀ * Bᵀ + beta * C. 184 for i := 0; i < m; i++ { 185 for j := 0; j < n; j++ { 186 var tmp complex64 187 for l := 0; l < k; l++ { 188 tmp += a[l*lda+i] * b[j*ldb+l] 189 } 190 if beta == 0 { 191 c[i*ldc+j] = alpha * tmp 192 } else { 193 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 194 } 195 } 196 } 197 case blas.ConjTrans: 198 // Form C = alpha * Aᵀ * Bᴴ + beta * C. 199 for i := 0; i < m; i++ { 200 for j := 0; j < n; j++ { 201 var tmp complex64 202 for l := 0; l < k; l++ { 203 tmp += a[l*lda+i] * cmplx.Conj(b[j*ldb+l]) 204 } 205 if beta == 0 { 206 c[i*ldc+j] = alpha * tmp 207 } else { 208 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 209 } 210 } 211 } 212 } 213 case blas.ConjTrans: 214 switch tB { 215 case blas.NoTrans: 216 // Form C = alpha * Aᴴ * B + beta * C. 217 for i := 0; i < m; i++ { 218 for j := 0; j < n; j++ { 219 var tmp complex64 220 for l := 0; l < k; l++ { 221 tmp += cmplx.Conj(a[l*lda+i]) * b[l*ldb+j] 222 } 223 if beta == 0 { 224 c[i*ldc+j] = alpha * tmp 225 } else { 226 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 227 } 228 } 229 } 230 case blas.Trans: 231 // Form C = alpha * Aᴴ * Bᵀ + beta * C. 232 for i := 0; i < m; i++ { 233 for j := 0; j < n; j++ { 234 var tmp complex64 235 for l := 0; l < k; l++ { 236 tmp += cmplx.Conj(a[l*lda+i]) * b[j*ldb+l] 237 } 238 if beta == 0 { 239 c[i*ldc+j] = alpha * tmp 240 } else { 241 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 242 } 243 } 244 } 245 case blas.ConjTrans: 246 // Form C = alpha * Aᴴ * Bᴴ + beta * C. 247 for i := 0; i < m; i++ { 248 for j := 0; j < n; j++ { 249 var tmp complex64 250 for l := 0; l < k; l++ { 251 tmp += cmplx.Conj(a[l*lda+i]) * cmplx.Conj(b[j*ldb+l]) 252 } 253 if beta == 0 { 254 c[i*ldc+j] = alpha * tmp 255 } else { 256 c[i*ldc+j] = alpha*tmp + beta*c[i*ldc+j] 257 } 258 } 259 } 260 } 261 } 262 } 263 264 // Chemm performs one of the matrix-matrix operations 265 // C = alpha*A*B + beta*C if side == blas.Left 266 // C = alpha*B*A + beta*C if side == blas.Right 267 // where alpha and beta are scalars, A is an m×m or n×n hermitian matrix and B 268 // and C are m×n matrices. The imaginary parts of the diagonal elements of A are 269 // assumed to be zero. 270 // 271 // Complex64 implementations are autogenerated and not directly tested. 272 func (Implementation) Chemm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 273 na := m 274 if side == blas.Right { 275 na = n 276 } 277 switch { 278 case side != blas.Left && side != blas.Right: 279 panic(badSide) 280 case uplo != blas.Lower && uplo != blas.Upper: 281 panic(badUplo) 282 case m < 0: 283 panic(mLT0) 284 case n < 0: 285 panic(nLT0) 286 case lda < max(1, na): 287 panic(badLdA) 288 case ldb < max(1, n): 289 panic(badLdB) 290 case ldc < max(1, n): 291 panic(badLdC) 292 } 293 294 // Quick return if possible. 295 if m == 0 || n == 0 { 296 return 297 } 298 299 // For zero matrix size the following slice length checks are trivially satisfied. 300 if len(a) < lda*(na-1)+na { 301 panic(shortA) 302 } 303 if len(b) < ldb*(m-1)+n { 304 panic(shortB) 305 } 306 if len(c) < ldc*(m-1)+n { 307 panic(shortC) 308 } 309 310 // Quick return if possible. 311 if alpha == 0 && beta == 1 { 312 return 313 } 314 315 if alpha == 0 { 316 if beta == 0 { 317 for i := 0; i < m; i++ { 318 ci := c[i*ldc : i*ldc+n] 319 for j := range ci { 320 ci[j] = 0 321 } 322 } 323 } else { 324 for i := 0; i < m; i++ { 325 ci := c[i*ldc : i*ldc+n] 326 c64.ScalUnitary(beta, ci) 327 } 328 } 329 return 330 } 331 332 if side == blas.Left { 333 // Form C = alpha*A*B + beta*C. 334 for i := 0; i < m; i++ { 335 atmp := alpha * complex(real(a[i*lda+i]), 0) 336 bi := b[i*ldb : i*ldb+n] 337 ci := c[i*ldc : i*ldc+n] 338 if beta == 0 { 339 for j, bij := range bi { 340 ci[j] = atmp * bij 341 } 342 } else { 343 for j, bij := range bi { 344 ci[j] = atmp*bij + beta*ci[j] 345 } 346 } 347 if uplo == blas.Upper { 348 for k := 0; k < i; k++ { 349 atmp = alpha * cmplx.Conj(a[k*lda+i]) 350 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 351 } 352 for k := i + 1; k < m; k++ { 353 atmp = alpha * a[i*lda+k] 354 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 355 } 356 } else { 357 for k := 0; k < i; k++ { 358 atmp = alpha * a[i*lda+k] 359 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 360 } 361 for k := i + 1; k < m; k++ { 362 atmp = alpha * cmplx.Conj(a[k*lda+i]) 363 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 364 } 365 } 366 } 367 } else { 368 // Form C = alpha*B*A + beta*C. 369 if uplo == blas.Upper { 370 for i := 0; i < m; i++ { 371 for j := n - 1; j >= 0; j-- { 372 abij := alpha * b[i*ldb+j] 373 aj := a[j*lda+j+1 : j*lda+n] 374 bi := b[i*ldb+j+1 : i*ldb+n] 375 ci := c[i*ldc+j+1 : i*ldc+n] 376 var tmp complex64 377 for k, ajk := range aj { 378 ci[k] += abij * ajk 379 tmp += bi[k] * cmplx.Conj(ajk) 380 } 381 ajj := complex(real(a[j*lda+j]), 0) 382 if beta == 0 { 383 c[i*ldc+j] = abij*ajj + alpha*tmp 384 } else { 385 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 386 } 387 } 388 } 389 } else { 390 for i := 0; i < m; i++ { 391 for j := 0; j < n; j++ { 392 abij := alpha * b[i*ldb+j] 393 aj := a[j*lda : j*lda+j] 394 bi := b[i*ldb : i*ldb+j] 395 ci := c[i*ldc : i*ldc+j] 396 var tmp complex64 397 for k, ajk := range aj { 398 ci[k] += abij * ajk 399 tmp += bi[k] * cmplx.Conj(ajk) 400 } 401 ajj := complex(real(a[j*lda+j]), 0) 402 if beta == 0 { 403 c[i*ldc+j] = abij*ajj + alpha*tmp 404 } else { 405 c[i*ldc+j] = abij*ajj + alpha*tmp + beta*c[i*ldc+j] 406 } 407 } 408 } 409 } 410 } 411 } 412 413 // Cherk performs one of the hermitian rank-k operations 414 // C = alpha*A*Aᴴ + beta*C if trans == blas.NoTrans 415 // C = alpha*Aᴴ*A + beta*C if trans == blas.ConjTrans 416 // where alpha and beta are real scalars, C is an n×n hermitian matrix and A is 417 // an n×k matrix in the first case and a k×n matrix in the second case. 418 // 419 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 420 // on return they will be set to zero. 421 // 422 // Complex64 implementations are autogenerated and not directly tested. 423 func (Implementation) Cherk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha float32, a []complex64, lda int, beta float32, c []complex64, ldc int) { 424 var rowA, colA int 425 switch trans { 426 default: 427 panic(badTranspose) 428 case blas.NoTrans: 429 rowA, colA = n, k 430 case blas.ConjTrans: 431 rowA, colA = k, n 432 } 433 switch { 434 case uplo != blas.Lower && uplo != blas.Upper: 435 panic(badUplo) 436 case n < 0: 437 panic(nLT0) 438 case k < 0: 439 panic(kLT0) 440 case lda < max(1, colA): 441 panic(badLdA) 442 case ldc < max(1, n): 443 panic(badLdC) 444 } 445 446 // Quick return if possible. 447 if n == 0 { 448 return 449 } 450 451 // For zero matrix size the following slice length checks are trivially satisfied. 452 if len(a) < (rowA-1)*lda+colA { 453 panic(shortA) 454 } 455 if len(c) < (n-1)*ldc+n { 456 panic(shortC) 457 } 458 459 // Quick return if possible. 460 if (alpha == 0 || k == 0) && beta == 1 { 461 return 462 } 463 464 if alpha == 0 { 465 if uplo == blas.Upper { 466 if beta == 0 { 467 for i := 0; i < n; i++ { 468 ci := c[i*ldc+i : i*ldc+n] 469 for j := range ci { 470 ci[j] = 0 471 } 472 } 473 } else { 474 for i := 0; i < n; i++ { 475 ci := c[i*ldc+i : i*ldc+n] 476 ci[0] = complex(beta*real(ci[0]), 0) 477 if i != n-1 { 478 c64.SscalUnitary(beta, ci[1:]) 479 } 480 } 481 } 482 } else { 483 if beta == 0 { 484 for i := 0; i < n; i++ { 485 ci := c[i*ldc : i*ldc+i+1] 486 for j := range ci { 487 ci[j] = 0 488 } 489 } 490 } else { 491 for i := 0; i < n; i++ { 492 ci := c[i*ldc : i*ldc+i+1] 493 if i != 0 { 494 c64.SscalUnitary(beta, ci[:i]) 495 } 496 ci[i] = complex(beta*real(ci[i]), 0) 497 } 498 } 499 } 500 return 501 } 502 503 calpha := complex(alpha, 0) 504 if trans == blas.NoTrans { 505 // Form C = alpha*A*Aᴴ + beta*C. 506 cbeta := complex(beta, 0) 507 if uplo == blas.Upper { 508 for i := 0; i < n; i++ { 509 ci := c[i*ldc+i : i*ldc+n] 510 ai := a[i*lda : i*lda+k] 511 switch { 512 case beta == 0: 513 // Handle the i-th diagonal element of C. 514 ci[0] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) 515 // Handle the remaining elements on the i-th row of C. 516 for jc := range ci[1:] { 517 j := i + 1 + jc 518 ci[jc+1] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) 519 } 520 case beta != 1: 521 cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[0] 522 ci[0] = complex(real(cii), 0) 523 for jc, cij := range ci[1:] { 524 j := i + 1 + jc 525 ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 526 } 527 default: 528 cii := calpha*c64.DotcUnitary(ai, ai) + ci[0] 529 ci[0] = complex(real(cii), 0) 530 for jc, cij := range ci[1:] { 531 j := i + 1 + jc 532 ci[jc+1] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 533 } 534 } 535 } 536 } else { 537 for i := 0; i < n; i++ { 538 ci := c[i*ldc : i*ldc+i+1] 539 ai := a[i*lda : i*lda+k] 540 switch { 541 case beta == 0: 542 // Handle the first i-1 elements on the i-th row of C. 543 for j := range ci[:i] { 544 ci[j] = calpha * c64.DotcUnitary(a[j*lda:j*lda+k], ai) 545 } 546 // Handle the i-th diagonal element of C. 547 ci[i] = complex(alpha*real(c64.DotcUnitary(ai, ai)), 0) 548 case beta != 1: 549 for j, cij := range ci[:i] { 550 ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cbeta*cij 551 } 552 cii := calpha*c64.DotcUnitary(ai, ai) + cbeta*ci[i] 553 ci[i] = complex(real(cii), 0) 554 default: 555 for j, cij := range ci[:i] { 556 ci[j] = calpha*c64.DotcUnitary(a[j*lda:j*lda+k], ai) + cij 557 } 558 cii := calpha*c64.DotcUnitary(ai, ai) + ci[i] 559 ci[i] = complex(real(cii), 0) 560 } 561 } 562 } 563 } else { 564 // Form C = alpha*Aᴴ*A + beta*C. 565 if uplo == blas.Upper { 566 for i := 0; i < n; i++ { 567 ci := c[i*ldc+i : i*ldc+n] 568 switch { 569 case beta == 0: 570 for jc := range ci { 571 ci[jc] = 0 572 } 573 case beta != 1: 574 c64.SscalUnitary(beta, ci) 575 ci[0] = complex(real(ci[0]), 0) 576 default: 577 ci[0] = complex(real(ci[0]), 0) 578 } 579 for j := 0; j < k; j++ { 580 aji := cmplx.Conj(a[j*lda+i]) 581 if aji != 0 { 582 c64.AxpyUnitary(calpha*aji, a[j*lda+i:j*lda+n], ci) 583 } 584 } 585 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 586 } 587 } else { 588 for i := 0; i < n; i++ { 589 ci := c[i*ldc : i*ldc+i+1] 590 switch { 591 case beta == 0: 592 for j := range ci { 593 ci[j] = 0 594 } 595 case beta != 1: 596 c64.SscalUnitary(beta, ci) 597 ci[i] = complex(real(ci[i]), 0) 598 default: 599 ci[i] = complex(real(ci[i]), 0) 600 } 601 for j := 0; j < k; j++ { 602 aji := cmplx.Conj(a[j*lda+i]) 603 if aji != 0 { 604 c64.AxpyUnitary(calpha*aji, a[j*lda:j*lda+i+1], ci) 605 } 606 } 607 c[i*ldc+i] = complex(real(c[i*ldc+i]), 0) 608 } 609 } 610 } 611 } 612 613 // Cher2k performs one of the hermitian rank-2k operations 614 // C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C if trans == blas.NoTrans 615 // C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C if trans == blas.ConjTrans 616 // where alpha and beta are scalars with beta real, C is an n×n hermitian matrix 617 // and A and B are n×k matrices in the first case and k×n matrices in the second case. 618 // 619 // The imaginary parts of the diagonal elements of C are assumed to be zero, and 620 // on return they will be set to zero. 621 // 622 // Complex64 implementations are autogenerated and not directly tested. 623 func (Implementation) Cher2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta float32, c []complex64, ldc int) { 624 var row, col int 625 switch trans { 626 default: 627 panic(badTranspose) 628 case blas.NoTrans: 629 row, col = n, k 630 case blas.ConjTrans: 631 row, col = k, n 632 } 633 switch { 634 case uplo != blas.Lower && uplo != blas.Upper: 635 panic(badUplo) 636 case n < 0: 637 panic(nLT0) 638 case k < 0: 639 panic(kLT0) 640 case lda < max(1, col): 641 panic(badLdA) 642 case ldb < max(1, col): 643 panic(badLdB) 644 case ldc < max(1, n): 645 panic(badLdC) 646 } 647 648 // Quick return if possible. 649 if n == 0 { 650 return 651 } 652 653 // For zero matrix size the following slice length checks are trivially satisfied. 654 if len(a) < (row-1)*lda+col { 655 panic(shortA) 656 } 657 if len(b) < (row-1)*ldb+col { 658 panic(shortB) 659 } 660 if len(c) < (n-1)*ldc+n { 661 panic(shortC) 662 } 663 664 // Quick return if possible. 665 if (alpha == 0 || k == 0) && beta == 1 { 666 return 667 } 668 669 if alpha == 0 { 670 if uplo == blas.Upper { 671 if beta == 0 { 672 for i := 0; i < n; i++ { 673 ci := c[i*ldc+i : i*ldc+n] 674 for j := range ci { 675 ci[j] = 0 676 } 677 } 678 } else { 679 for i := 0; i < n; i++ { 680 ci := c[i*ldc+i : i*ldc+n] 681 ci[0] = complex(beta*real(ci[0]), 0) 682 if i != n-1 { 683 c64.SscalUnitary(beta, ci[1:]) 684 } 685 } 686 } 687 } else { 688 if beta == 0 { 689 for i := 0; i < n; i++ { 690 ci := c[i*ldc : i*ldc+i+1] 691 for j := range ci { 692 ci[j] = 0 693 } 694 } 695 } else { 696 for i := 0; i < n; i++ { 697 ci := c[i*ldc : i*ldc+i+1] 698 if i != 0 { 699 c64.SscalUnitary(beta, ci[:i]) 700 } 701 ci[i] = complex(beta*real(ci[i]), 0) 702 } 703 } 704 } 705 return 706 } 707 708 conjalpha := cmplx.Conj(alpha) 709 cbeta := complex(beta, 0) 710 if trans == blas.NoTrans { 711 // Form C = alpha*A*Bᴴ + conj(alpha)*B*Aᴴ + beta*C. 712 if uplo == blas.Upper { 713 for i := 0; i < n; i++ { 714 ci := c[i*ldc+i+1 : i*ldc+n] 715 ai := a[i*lda : i*lda+k] 716 bi := b[i*ldb : i*ldb+k] 717 if beta == 0 { 718 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) 719 c[i*ldc+i] = complex(real(cii), 0) 720 for jc := range ci { 721 j := i + 1 + jc 722 ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) 723 } 724 } else { 725 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 726 c[i*ldc+i] = complex(real(cii), 0) 727 for jc, cij := range ci { 728 j := i + 1 + jc 729 ci[jc] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 730 } 731 } 732 } 733 } else { 734 for i := 0; i < n; i++ { 735 ci := c[i*ldc : i*ldc+i] 736 ai := a[i*lda : i*lda+k] 737 bi := b[i*ldb : i*ldb+k] 738 if beta == 0 { 739 for j := range ci { 740 ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) 741 } 742 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) 743 c[i*ldc+i] = complex(real(cii), 0) 744 } else { 745 for j, cij := range ci { 746 ci[j] = alpha*c64.DotcUnitary(b[j*ldb:j*ldb+k], ai) + conjalpha*c64.DotcUnitary(a[j*lda:j*lda+k], bi) + cbeta*cij 747 } 748 cii := alpha*c64.DotcUnitary(bi, ai) + conjalpha*c64.DotcUnitary(ai, bi) + cbeta*c[i*ldc+i] 749 c[i*ldc+i] = complex(real(cii), 0) 750 } 751 } 752 } 753 } else { 754 // Form C = alpha*Aᴴ*B + conj(alpha)*Bᴴ*A + beta*C. 755 if uplo == blas.Upper { 756 for i := 0; i < n; i++ { 757 ci := c[i*ldc+i : i*ldc+n] 758 switch { 759 case beta == 0: 760 for jc := range ci { 761 ci[jc] = 0 762 } 763 case beta != 1: 764 c64.SscalUnitary(beta, ci) 765 ci[0] = complex(real(ci[0]), 0) 766 default: 767 ci[0] = complex(real(ci[0]), 0) 768 } 769 for j := 0; j < k; j++ { 770 aji := a[j*lda+i] 771 bji := b[j*ldb+i] 772 if aji != 0 { 773 c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb+i:j*ldb+n], ci) 774 } 775 if bji != 0 { 776 c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda+i:j*lda+n], ci) 777 } 778 } 779 ci[0] = complex(real(ci[0]), 0) 780 } 781 } else { 782 for i := 0; i < n; i++ { 783 ci := c[i*ldc : i*ldc+i+1] 784 switch { 785 case beta == 0: 786 for j := range ci { 787 ci[j] = 0 788 } 789 case beta != 1: 790 c64.SscalUnitary(beta, ci) 791 ci[i] = complex(real(ci[i]), 0) 792 default: 793 ci[i] = complex(real(ci[i]), 0) 794 } 795 for j := 0; j < k; j++ { 796 aji := a[j*lda+i] 797 bji := b[j*ldb+i] 798 if aji != 0 { 799 c64.AxpyUnitary(alpha*cmplx.Conj(aji), b[j*ldb:j*ldb+i+1], ci) 800 } 801 if bji != 0 { 802 c64.AxpyUnitary(conjalpha*cmplx.Conj(bji), a[j*lda:j*lda+i+1], ci) 803 } 804 } 805 ci[i] = complex(real(ci[i]), 0) 806 } 807 } 808 } 809 } 810 811 // Csymm performs one of the matrix-matrix operations 812 // C = alpha*A*B + beta*C if side == blas.Left 813 // C = alpha*B*A + beta*C if side == blas.Right 814 // where alpha and beta are scalars, A is an m×m or n×n symmetric matrix and B 815 // and C are m×n matrices. 816 // 817 // Complex64 implementations are autogenerated and not directly tested. 818 func (Implementation) Csymm(side blas.Side, uplo blas.Uplo, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 819 na := m 820 if side == blas.Right { 821 na = n 822 } 823 switch { 824 case side != blas.Left && side != blas.Right: 825 panic(badSide) 826 case uplo != blas.Lower && uplo != blas.Upper: 827 panic(badUplo) 828 case m < 0: 829 panic(mLT0) 830 case n < 0: 831 panic(nLT0) 832 case lda < max(1, na): 833 panic(badLdA) 834 case ldb < max(1, n): 835 panic(badLdB) 836 case ldc < max(1, n): 837 panic(badLdC) 838 } 839 840 // Quick return if possible. 841 if m == 0 || n == 0 { 842 return 843 } 844 845 // For zero matrix size the following slice length checks are trivially satisfied. 846 if len(a) < lda*(na-1)+na { 847 panic(shortA) 848 } 849 if len(b) < ldb*(m-1)+n { 850 panic(shortB) 851 } 852 if len(c) < ldc*(m-1)+n { 853 panic(shortC) 854 } 855 856 // Quick return if possible. 857 if alpha == 0 && beta == 1 { 858 return 859 } 860 861 if alpha == 0 { 862 if beta == 0 { 863 for i := 0; i < m; i++ { 864 ci := c[i*ldc : i*ldc+n] 865 for j := range ci { 866 ci[j] = 0 867 } 868 } 869 } else { 870 for i := 0; i < m; i++ { 871 ci := c[i*ldc : i*ldc+n] 872 c64.ScalUnitary(beta, ci) 873 } 874 } 875 return 876 } 877 878 if side == blas.Left { 879 // Form C = alpha*A*B + beta*C. 880 for i := 0; i < m; i++ { 881 atmp := alpha * a[i*lda+i] 882 bi := b[i*ldb : i*ldb+n] 883 ci := c[i*ldc : i*ldc+n] 884 if beta == 0 { 885 for j, bij := range bi { 886 ci[j] = atmp * bij 887 } 888 } else { 889 for j, bij := range bi { 890 ci[j] = atmp*bij + beta*ci[j] 891 } 892 } 893 if uplo == blas.Upper { 894 for k := 0; k < i; k++ { 895 atmp = alpha * a[k*lda+i] 896 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 897 } 898 for k := i + 1; k < m; k++ { 899 atmp = alpha * a[i*lda+k] 900 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 901 } 902 } else { 903 for k := 0; k < i; k++ { 904 atmp = alpha * a[i*lda+k] 905 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 906 } 907 for k := i + 1; k < m; k++ { 908 atmp = alpha * a[k*lda+i] 909 c64.AxpyUnitary(atmp, b[k*ldb:k*ldb+n], ci) 910 } 911 } 912 } 913 } else { 914 // Form C = alpha*B*A + beta*C. 915 if uplo == blas.Upper { 916 for i := 0; i < m; i++ { 917 for j := n - 1; j >= 0; j-- { 918 abij := alpha * b[i*ldb+j] 919 aj := a[j*lda+j+1 : j*lda+n] 920 bi := b[i*ldb+j+1 : i*ldb+n] 921 ci := c[i*ldc+j+1 : i*ldc+n] 922 var tmp complex64 923 for k, ajk := range aj { 924 ci[k] += abij * ajk 925 tmp += bi[k] * ajk 926 } 927 if beta == 0 { 928 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 929 } else { 930 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 931 } 932 } 933 } 934 } else { 935 for i := 0; i < m; i++ { 936 for j := 0; j < n; j++ { 937 abij := alpha * b[i*ldb+j] 938 aj := a[j*lda : j*lda+j] 939 bi := b[i*ldb : i*ldb+j] 940 ci := c[i*ldc : i*ldc+j] 941 var tmp complex64 942 for k, ajk := range aj { 943 ci[k] += abij * ajk 944 tmp += bi[k] * ajk 945 } 946 if beta == 0 { 947 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp 948 } else { 949 c[i*ldc+j] = abij*a[j*lda+j] + alpha*tmp + beta*c[i*ldc+j] 950 } 951 } 952 } 953 } 954 } 955 } 956 957 // Csyrk performs one of the symmetric rank-k operations 958 // C = alpha*A*Aᵀ + beta*C if trans == blas.NoTrans 959 // C = alpha*Aᵀ*A + beta*C if trans == blas.Trans 960 // where alpha and beta are scalars, C is an n×n symmetric matrix and A is 961 // an n×k matrix in the first case and a k×n matrix in the second case. 962 // 963 // Complex64 implementations are autogenerated and not directly tested. 964 func (Implementation) Csyrk(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, beta complex64, c []complex64, ldc int) { 965 var rowA, colA int 966 switch trans { 967 default: 968 panic(badTranspose) 969 case blas.NoTrans: 970 rowA, colA = n, k 971 case blas.Trans: 972 rowA, colA = k, n 973 } 974 switch { 975 case uplo != blas.Lower && uplo != blas.Upper: 976 panic(badUplo) 977 case n < 0: 978 panic(nLT0) 979 case k < 0: 980 panic(kLT0) 981 case lda < max(1, colA): 982 panic(badLdA) 983 case ldc < max(1, n): 984 panic(badLdC) 985 } 986 987 // Quick return if possible. 988 if n == 0 { 989 return 990 } 991 992 // For zero matrix size the following slice length checks are trivially satisfied. 993 if len(a) < (rowA-1)*lda+colA { 994 panic(shortA) 995 } 996 if len(c) < (n-1)*ldc+n { 997 panic(shortC) 998 } 999 1000 // Quick return if possible. 1001 if (alpha == 0 || k == 0) && beta == 1 { 1002 return 1003 } 1004 1005 if alpha == 0 { 1006 if uplo == blas.Upper { 1007 if beta == 0 { 1008 for i := 0; i < n; i++ { 1009 ci := c[i*ldc+i : i*ldc+n] 1010 for j := range ci { 1011 ci[j] = 0 1012 } 1013 } 1014 } else { 1015 for i := 0; i < n; i++ { 1016 ci := c[i*ldc+i : i*ldc+n] 1017 c64.ScalUnitary(beta, ci) 1018 } 1019 } 1020 } else { 1021 if beta == 0 { 1022 for i := 0; i < n; i++ { 1023 ci := c[i*ldc : i*ldc+i+1] 1024 for j := range ci { 1025 ci[j] = 0 1026 } 1027 } 1028 } else { 1029 for i := 0; i < n; i++ { 1030 ci := c[i*ldc : i*ldc+i+1] 1031 c64.ScalUnitary(beta, ci) 1032 } 1033 } 1034 } 1035 return 1036 } 1037 1038 if trans == blas.NoTrans { 1039 // Form C = alpha*A*Aᵀ + beta*C. 1040 if uplo == blas.Upper { 1041 for i := 0; i < n; i++ { 1042 ci := c[i*ldc+i : i*ldc+n] 1043 ai := a[i*lda : i*lda+k] 1044 if beta == 0 { 1045 for jc := range ci { 1046 j := i + jc 1047 ci[jc] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1048 } 1049 } else { 1050 for jc, cij := range ci { 1051 j := i + jc 1052 ci[jc] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1053 } 1054 } 1055 } 1056 } else { 1057 for i := 0; i < n; i++ { 1058 ci := c[i*ldc : i*ldc+i+1] 1059 ai := a[i*lda : i*lda+k] 1060 if beta == 0 { 1061 for j := range ci { 1062 ci[j] = alpha * c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1063 } 1064 } else { 1065 for j, cij := range ci { 1066 ci[j] = beta*cij + alpha*c64.DotuUnitary(ai, a[j*lda:j*lda+k]) 1067 } 1068 } 1069 } 1070 } 1071 } else { 1072 // Form C = alpha*Aᵀ*A + beta*C. 1073 if uplo == blas.Upper { 1074 for i := 0; i < n; i++ { 1075 ci := c[i*ldc+i : i*ldc+n] 1076 switch { 1077 case beta == 0: 1078 for jc := range ci { 1079 ci[jc] = 0 1080 } 1081 case beta != 1: 1082 for jc := range ci { 1083 ci[jc] *= beta 1084 } 1085 } 1086 for j := 0; j < k; j++ { 1087 aji := a[j*lda+i] 1088 if aji != 0 { 1089 c64.AxpyUnitary(alpha*aji, a[j*lda+i:j*lda+n], ci) 1090 } 1091 } 1092 } 1093 } else { 1094 for i := 0; i < n; i++ { 1095 ci := c[i*ldc : i*ldc+i+1] 1096 switch { 1097 case beta == 0: 1098 for j := range ci { 1099 ci[j] = 0 1100 } 1101 case beta != 1: 1102 for j := range ci { 1103 ci[j] *= beta 1104 } 1105 } 1106 for j := 0; j < k; j++ { 1107 aji := a[j*lda+i] 1108 if aji != 0 { 1109 c64.AxpyUnitary(alpha*aji, a[j*lda:j*lda+i+1], ci) 1110 } 1111 } 1112 } 1113 } 1114 } 1115 } 1116 1117 // Csyr2k performs one of the symmetric rank-2k operations 1118 // C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C if trans == blas.NoTrans 1119 // C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C if trans == blas.Trans 1120 // where alpha and beta are scalars, C is an n×n symmetric matrix and A and B 1121 // are n×k matrices in the first case and k×n matrices in the second case. 1122 // 1123 // Complex64 implementations are autogenerated and not directly tested. 1124 func (Implementation) Csyr2k(uplo blas.Uplo, trans blas.Transpose, n, k int, alpha complex64, a []complex64, lda int, b []complex64, ldb int, beta complex64, c []complex64, ldc int) { 1125 var row, col int 1126 switch trans { 1127 default: 1128 panic(badTranspose) 1129 case blas.NoTrans: 1130 row, col = n, k 1131 case blas.Trans: 1132 row, col = k, n 1133 } 1134 switch { 1135 case uplo != blas.Lower && uplo != blas.Upper: 1136 panic(badUplo) 1137 case n < 0: 1138 panic(nLT0) 1139 case k < 0: 1140 panic(kLT0) 1141 case lda < max(1, col): 1142 panic(badLdA) 1143 case ldb < max(1, col): 1144 panic(badLdB) 1145 case ldc < max(1, n): 1146 panic(badLdC) 1147 } 1148 1149 // Quick return if possible. 1150 if n == 0 { 1151 return 1152 } 1153 1154 // For zero matrix size the following slice length checks are trivially satisfied. 1155 if len(a) < (row-1)*lda+col { 1156 panic(shortA) 1157 } 1158 if len(b) < (row-1)*ldb+col { 1159 panic(shortB) 1160 } 1161 if len(c) < (n-1)*ldc+n { 1162 panic(shortC) 1163 } 1164 1165 // Quick return if possible. 1166 if (alpha == 0 || k == 0) && beta == 1 { 1167 return 1168 } 1169 1170 if alpha == 0 { 1171 if uplo == blas.Upper { 1172 if beta == 0 { 1173 for i := 0; i < n; i++ { 1174 ci := c[i*ldc+i : i*ldc+n] 1175 for j := range ci { 1176 ci[j] = 0 1177 } 1178 } 1179 } else { 1180 for i := 0; i < n; i++ { 1181 ci := c[i*ldc+i : i*ldc+n] 1182 c64.ScalUnitary(beta, ci) 1183 } 1184 } 1185 } else { 1186 if beta == 0 { 1187 for i := 0; i < n; i++ { 1188 ci := c[i*ldc : i*ldc+i+1] 1189 for j := range ci { 1190 ci[j] = 0 1191 } 1192 } 1193 } else { 1194 for i := 0; i < n; i++ { 1195 ci := c[i*ldc : i*ldc+i+1] 1196 c64.ScalUnitary(beta, ci) 1197 } 1198 } 1199 } 1200 return 1201 } 1202 1203 if trans == blas.NoTrans { 1204 // Form C = alpha*A*Bᵀ + alpha*B*Aᵀ + beta*C. 1205 if uplo == blas.Upper { 1206 for i := 0; i < n; i++ { 1207 ci := c[i*ldc+i : i*ldc+n] 1208 ai := a[i*lda : i*lda+k] 1209 bi := b[i*ldb : i*ldb+k] 1210 if beta == 0 { 1211 for jc := range ci { 1212 j := i + jc 1213 ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) 1214 } 1215 } else { 1216 for jc, cij := range ci { 1217 j := i + jc 1218 ci[jc] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1219 } 1220 } 1221 } 1222 } else { 1223 for i := 0; i < n; i++ { 1224 ci := c[i*ldc : i*ldc+i+1] 1225 ai := a[i*lda : i*lda+k] 1226 bi := b[i*ldb : i*ldb+k] 1227 if beta == 0 { 1228 for j := range ci { 1229 ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) 1230 } 1231 } else { 1232 for j, cij := range ci { 1233 ci[j] = alpha*c64.DotuUnitary(ai, b[j*ldb:j*ldb+k]) + alpha*c64.DotuUnitary(bi, a[j*lda:j*lda+k]) + beta*cij 1234 } 1235 } 1236 } 1237 } 1238 } else { 1239 // Form C = alpha*Aᵀ*B + alpha*Bᵀ*A + beta*C. 1240 if uplo == blas.Upper { 1241 for i := 0; i < n; i++ { 1242 ci := c[i*ldc+i : i*ldc+n] 1243 switch { 1244 case beta == 0: 1245 for jc := range ci { 1246 ci[jc] = 0 1247 } 1248 case beta != 1: 1249 for jc := range ci { 1250 ci[jc] *= beta 1251 } 1252 } 1253 for j := 0; j < k; j++ { 1254 aji := a[j*lda+i] 1255 bji := b[j*ldb+i] 1256 if aji != 0 { 1257 c64.AxpyUnitary(alpha*aji, b[j*ldb+i:j*ldb+n], ci) 1258 } 1259 if bji != 0 { 1260 c64.AxpyUnitary(alpha*bji, a[j*lda+i:j*lda+n], ci) 1261 } 1262 } 1263 } 1264 } else { 1265 for i := 0; i < n; i++ { 1266 ci := c[i*ldc : i*ldc+i+1] 1267 switch { 1268 case beta == 0: 1269 for j := range ci { 1270 ci[j] = 0 1271 } 1272 case beta != 1: 1273 for j := range ci { 1274 ci[j] *= beta 1275 } 1276 } 1277 for j := 0; j < k; j++ { 1278 aji := a[j*lda+i] 1279 bji := b[j*ldb+i] 1280 if aji != 0 { 1281 c64.AxpyUnitary(alpha*aji, b[j*ldb:j*ldb+i+1], ci) 1282 } 1283 if bji != 0 { 1284 c64.AxpyUnitary(alpha*bji, a[j*lda:j*lda+i+1], ci) 1285 } 1286 } 1287 } 1288 } 1289 } 1290 } 1291 1292 // Ctrmm performs one of the matrix-matrix operations 1293 // B = alpha * op(A) * B if side == blas.Left, 1294 // B = alpha * B * op(A) if side == blas.Right, 1295 // where alpha is a scalar, B is an m×n matrix, A is a unit, or non-unit, 1296 // upper or lower triangular matrix and op(A) is one of 1297 // op(A) = A if trans == blas.NoTrans, 1298 // op(A) = Aᵀ if trans == blas.Trans, 1299 // op(A) = Aᴴ if trans == blas.ConjTrans. 1300 // 1301 // Complex64 implementations are autogenerated and not directly tested. 1302 func (Implementation) Ctrmm(side blas.Side, uplo blas.Uplo, trans blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { 1303 na := m 1304 if side == blas.Right { 1305 na = n 1306 } 1307 switch { 1308 case side != blas.Left && side != blas.Right: 1309 panic(badSide) 1310 case uplo != blas.Lower && uplo != blas.Upper: 1311 panic(badUplo) 1312 case trans != blas.NoTrans && trans != blas.Trans && trans != blas.ConjTrans: 1313 panic(badTranspose) 1314 case diag != blas.Unit && diag != blas.NonUnit: 1315 panic(badDiag) 1316 case m < 0: 1317 panic(mLT0) 1318 case n < 0: 1319 panic(nLT0) 1320 case lda < max(1, na): 1321 panic(badLdA) 1322 case ldb < max(1, n): 1323 panic(badLdB) 1324 } 1325 1326 // Quick return if possible. 1327 if m == 0 || n == 0 { 1328 return 1329 } 1330 1331 // For zero matrix size the following slice length checks are trivially satisfied. 1332 if len(a) < (na-1)*lda+na { 1333 panic(shortA) 1334 } 1335 if len(b) < (m-1)*ldb+n { 1336 panic(shortB) 1337 } 1338 1339 // Quick return if possible. 1340 if alpha == 0 { 1341 for i := 0; i < m; i++ { 1342 bi := b[i*ldb : i*ldb+n] 1343 for j := range bi { 1344 bi[j] = 0 1345 } 1346 } 1347 return 1348 } 1349 1350 noConj := trans != blas.ConjTrans 1351 noUnit := diag == blas.NonUnit 1352 if side == blas.Left { 1353 if trans == blas.NoTrans { 1354 // Form B = alpha*A*B. 1355 if uplo == blas.Upper { 1356 for i := 0; i < m; i++ { 1357 aii := alpha 1358 if noUnit { 1359 aii *= a[i*lda+i] 1360 } 1361 bi := b[i*ldb : i*ldb+n] 1362 for j := range bi { 1363 bi[j] *= aii 1364 } 1365 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1366 j := ja + i + 1 1367 if aij != 0 { 1368 c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1369 } 1370 } 1371 } 1372 } else { 1373 for i := m - 1; i >= 0; i-- { 1374 aii := alpha 1375 if noUnit { 1376 aii *= a[i*lda+i] 1377 } 1378 bi := b[i*ldb : i*ldb+n] 1379 for j := range bi { 1380 bi[j] *= aii 1381 } 1382 for j, aij := range a[i*lda : i*lda+i] { 1383 if aij != 0 { 1384 c64.AxpyUnitary(alpha*aij, b[j*ldb:j*ldb+n], bi) 1385 } 1386 } 1387 } 1388 } 1389 } else { 1390 // Form B = alpha*Aᵀ*B or B = alpha*Aᴴ*B. 1391 if uplo == blas.Upper { 1392 for k := m - 1; k >= 0; k-- { 1393 bk := b[k*ldb : k*ldb+n] 1394 for ja, ajk := range a[k*lda+k+1 : k*lda+m] { 1395 if ajk == 0 { 1396 continue 1397 } 1398 j := k + 1 + ja 1399 if noConj { 1400 c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1401 } else { 1402 c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1403 } 1404 } 1405 akk := alpha 1406 if noUnit { 1407 if noConj { 1408 akk *= a[k*lda+k] 1409 } else { 1410 akk *= cmplx.Conj(a[k*lda+k]) 1411 } 1412 } 1413 if akk != 1 { 1414 c64.ScalUnitary(akk, bk) 1415 } 1416 } 1417 } else { 1418 for k := 0; k < m; k++ { 1419 bk := b[k*ldb : k*ldb+n] 1420 for j, ajk := range a[k*lda : k*lda+k] { 1421 if ajk == 0 { 1422 continue 1423 } 1424 if noConj { 1425 c64.AxpyUnitary(alpha*ajk, bk, b[j*ldb:j*ldb+n]) 1426 } else { 1427 c64.AxpyUnitary(alpha*cmplx.Conj(ajk), bk, b[j*ldb:j*ldb+n]) 1428 } 1429 } 1430 akk := alpha 1431 if noUnit { 1432 if noConj { 1433 akk *= a[k*lda+k] 1434 } else { 1435 akk *= cmplx.Conj(a[k*lda+k]) 1436 } 1437 } 1438 if akk != 1 { 1439 c64.ScalUnitary(akk, bk) 1440 } 1441 } 1442 } 1443 } 1444 } else { 1445 if trans == blas.NoTrans { 1446 // Form B = alpha*B*A. 1447 if uplo == blas.Upper { 1448 for i := 0; i < m; i++ { 1449 bi := b[i*ldb : i*ldb+n] 1450 for k := n - 1; k >= 0; k-- { 1451 abik := alpha * bi[k] 1452 if abik == 0 { 1453 continue 1454 } 1455 bi[k] = abik 1456 if noUnit { 1457 bi[k] *= a[k*lda+k] 1458 } 1459 c64.AxpyUnitary(abik, a[k*lda+k+1:k*lda+n], bi[k+1:]) 1460 } 1461 } 1462 } else { 1463 for i := 0; i < m; i++ { 1464 bi := b[i*ldb : i*ldb+n] 1465 for k := 0; k < n; k++ { 1466 abik := alpha * bi[k] 1467 if abik == 0 { 1468 continue 1469 } 1470 bi[k] = abik 1471 if noUnit { 1472 bi[k] *= a[k*lda+k] 1473 } 1474 c64.AxpyUnitary(abik, a[k*lda:k*lda+k], bi[:k]) 1475 } 1476 } 1477 } 1478 } else { 1479 // Form B = alpha*B*Aᵀ or B = alpha*B*Aᴴ. 1480 if uplo == blas.Upper { 1481 for i := 0; i < m; i++ { 1482 bi := b[i*ldb : i*ldb+n] 1483 for j, bij := range bi { 1484 if noConj { 1485 if noUnit { 1486 bij *= a[j*lda+j] 1487 } 1488 bij += c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1489 } else { 1490 if noUnit { 1491 bij *= cmplx.Conj(a[j*lda+j]) 1492 } 1493 bij += c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1494 } 1495 bi[j] = alpha * bij 1496 } 1497 } 1498 } else { 1499 for i := 0; i < m; i++ { 1500 bi := b[i*ldb : i*ldb+n] 1501 for j := n - 1; j >= 0; j-- { 1502 bij := bi[j] 1503 if noConj { 1504 if noUnit { 1505 bij *= a[j*lda+j] 1506 } 1507 bij += c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1508 } else { 1509 if noUnit { 1510 bij *= cmplx.Conj(a[j*lda+j]) 1511 } 1512 bij += c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1513 } 1514 bi[j] = alpha * bij 1515 } 1516 } 1517 } 1518 } 1519 } 1520 } 1521 1522 // Ctrsm solves one of the matrix equations 1523 // op(A) * X = alpha * B if side == blas.Left, 1524 // X * op(A) = alpha * B if side == blas.Right, 1525 // where alpha is a scalar, X and B are m×n matrices, A is a unit or 1526 // non-unit, upper or lower triangular matrix and op(A) is one of 1527 // op(A) = A if transA == blas.NoTrans, 1528 // op(A) = Aᵀ if transA == blas.Trans, 1529 // op(A) = Aᴴ if transA == blas.ConjTrans. 1530 // On return the matrix X is overwritten on B. 1531 // 1532 // Complex64 implementations are autogenerated and not directly tested. 1533 func (Implementation) Ctrsm(side blas.Side, uplo blas.Uplo, transA blas.Transpose, diag blas.Diag, m, n int, alpha complex64, a []complex64, lda int, b []complex64, ldb int) { 1534 na := m 1535 if side == blas.Right { 1536 na = n 1537 } 1538 switch { 1539 case side != blas.Left && side != blas.Right: 1540 panic(badSide) 1541 case uplo != blas.Lower && uplo != blas.Upper: 1542 panic(badUplo) 1543 case transA != blas.NoTrans && transA != blas.Trans && transA != blas.ConjTrans: 1544 panic(badTranspose) 1545 case diag != blas.Unit && diag != blas.NonUnit: 1546 panic(badDiag) 1547 case m < 0: 1548 panic(mLT0) 1549 case n < 0: 1550 panic(nLT0) 1551 case lda < max(1, na): 1552 panic(badLdA) 1553 case ldb < max(1, n): 1554 panic(badLdB) 1555 } 1556 1557 // Quick return if possible. 1558 if m == 0 || n == 0 { 1559 return 1560 } 1561 1562 // For zero matrix size the following slice length checks are trivially satisfied. 1563 if len(a) < (na-1)*lda+na { 1564 panic(shortA) 1565 } 1566 if len(b) < (m-1)*ldb+n { 1567 panic(shortB) 1568 } 1569 1570 if alpha == 0 { 1571 for i := 0; i < m; i++ { 1572 for j := 0; j < n; j++ { 1573 b[i*ldb+j] = 0 1574 } 1575 } 1576 return 1577 } 1578 1579 noConj := transA != blas.ConjTrans 1580 noUnit := diag == blas.NonUnit 1581 if side == blas.Left { 1582 if transA == blas.NoTrans { 1583 // Form B = alpha*inv(A)*B. 1584 if uplo == blas.Upper { 1585 for i := m - 1; i >= 0; i-- { 1586 bi := b[i*ldb : i*ldb+n] 1587 if alpha != 1 { 1588 c64.ScalUnitary(alpha, bi) 1589 } 1590 for ka, aik := range a[i*lda+i+1 : i*lda+m] { 1591 k := i + 1 + ka 1592 if aik != 0 { 1593 c64.AxpyUnitary(-aik, b[k*ldb:k*ldb+n], bi) 1594 } 1595 } 1596 if noUnit { 1597 c64.ScalUnitary(1/a[i*lda+i], bi) 1598 } 1599 } 1600 } else { 1601 for i := 0; i < m; i++ { 1602 bi := b[i*ldb : i*ldb+n] 1603 if alpha != 1 { 1604 c64.ScalUnitary(alpha, bi) 1605 } 1606 for j, aij := range a[i*lda : i*lda+i] { 1607 if aij != 0 { 1608 c64.AxpyUnitary(-aij, b[j*ldb:j*ldb+n], bi) 1609 } 1610 } 1611 if noUnit { 1612 c64.ScalUnitary(1/a[i*lda+i], bi) 1613 } 1614 } 1615 } 1616 } else { 1617 // Form B = alpha*inv(Aᵀ)*B or B = alpha*inv(Aᴴ)*B. 1618 if uplo == blas.Upper { 1619 for i := 0; i < m; i++ { 1620 bi := b[i*ldb : i*ldb+n] 1621 if noUnit { 1622 if noConj { 1623 c64.ScalUnitary(1/a[i*lda+i], bi) 1624 } else { 1625 c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1626 } 1627 } 1628 for ja, aij := range a[i*lda+i+1 : i*lda+m] { 1629 if aij == 0 { 1630 continue 1631 } 1632 j := i + 1 + ja 1633 if noConj { 1634 c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1635 } else { 1636 c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1637 } 1638 } 1639 if alpha != 1 { 1640 c64.ScalUnitary(alpha, bi) 1641 } 1642 } 1643 } else { 1644 for i := m - 1; i >= 0; i-- { 1645 bi := b[i*ldb : i*ldb+n] 1646 if noUnit { 1647 if noConj { 1648 c64.ScalUnitary(1/a[i*lda+i], bi) 1649 } else { 1650 c64.ScalUnitary(1/cmplx.Conj(a[i*lda+i]), bi) 1651 } 1652 } 1653 for j, aij := range a[i*lda : i*lda+i] { 1654 if aij == 0 { 1655 continue 1656 } 1657 if noConj { 1658 c64.AxpyUnitary(-aij, bi, b[j*ldb:j*ldb+n]) 1659 } else { 1660 c64.AxpyUnitary(-cmplx.Conj(aij), bi, b[j*ldb:j*ldb+n]) 1661 } 1662 } 1663 if alpha != 1 { 1664 c64.ScalUnitary(alpha, bi) 1665 } 1666 } 1667 } 1668 } 1669 } else { 1670 if transA == blas.NoTrans { 1671 // Form B = alpha*B*inv(A). 1672 if uplo == blas.Upper { 1673 for i := 0; i < m; i++ { 1674 bi := b[i*ldb : i*ldb+n] 1675 if alpha != 1 { 1676 c64.ScalUnitary(alpha, bi) 1677 } 1678 for j, bij := range bi { 1679 if bij == 0 { 1680 continue 1681 } 1682 if noUnit { 1683 bi[j] /= a[j*lda+j] 1684 } 1685 c64.AxpyUnitary(-bi[j], a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1686 } 1687 } 1688 } else { 1689 for i := 0; i < m; i++ { 1690 bi := b[i*ldb : i*ldb+n] 1691 if alpha != 1 { 1692 c64.ScalUnitary(alpha, bi) 1693 } 1694 for j := n - 1; j >= 0; j-- { 1695 if bi[j] == 0 { 1696 continue 1697 } 1698 if noUnit { 1699 bi[j] /= a[j*lda+j] 1700 } 1701 c64.AxpyUnitary(-bi[j], a[j*lda:j*lda+j], bi[:j]) 1702 } 1703 } 1704 } 1705 } else { 1706 // Form B = alpha*B*inv(Aᵀ) or B = alpha*B*inv(Aᴴ). 1707 if uplo == blas.Upper { 1708 for i := 0; i < m; i++ { 1709 bi := b[i*ldb : i*ldb+n] 1710 for j := n - 1; j >= 0; j-- { 1711 bij := alpha * bi[j] 1712 if noConj { 1713 bij -= c64.DotuUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1714 if noUnit { 1715 bij /= a[j*lda+j] 1716 } 1717 } else { 1718 bij -= c64.DotcUnitary(a[j*lda+j+1:j*lda+n], bi[j+1:n]) 1719 if noUnit { 1720 bij /= cmplx.Conj(a[j*lda+j]) 1721 } 1722 } 1723 bi[j] = bij 1724 } 1725 } 1726 } else { 1727 for i := 0; i < m; i++ { 1728 bi := b[i*ldb : i*ldb+n] 1729 for j, bij := range bi { 1730 bij *= alpha 1731 if noConj { 1732 bij -= c64.DotuUnitary(a[j*lda:j*lda+j], bi[:j]) 1733 if noUnit { 1734 bij /= a[j*lda+j] 1735 } 1736 } else { 1737 bij -= c64.DotcUnitary(a[j*lda:j*lda+j], bi[:j]) 1738 if noUnit { 1739 bij /= cmplx.Conj(a[j*lda+j]) 1740 } 1741 } 1742 bi[j] = bij 1743 } 1744 } 1745 } 1746 } 1747 } 1748 }