gonum.org/v1/gonum@v0.14.0/blas/gonum/level2float64.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "gonum.org/v1/gonum/blas" 9 "gonum.org/v1/gonum/internal/asm/f64" 10 ) 11 12 var _ blas.Float64Level2 = Implementation{} 13 14 // Dger performs the rank-one operation 15 // 16 // A += alpha * x * yᵀ 17 // 18 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 19 func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 20 if m < 0 { 21 panic(mLT0) 22 } 23 if n < 0 { 24 panic(nLT0) 25 } 26 if lda < max(1, n) { 27 panic(badLdA) 28 } 29 if incX == 0 { 30 panic(zeroIncX) 31 } 32 if incY == 0 { 33 panic(zeroIncY) 34 } 35 36 // Quick return if possible. 37 if m == 0 || n == 0 { 38 return 39 } 40 41 // For zero matrix size the following slice length checks are trivially satisfied. 42 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 43 panic(shortX) 44 } 45 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 46 panic(shortY) 47 } 48 if len(a) < lda*(m-1)+n { 49 panic(shortA) 50 } 51 52 // Quick return if possible. 53 if alpha == 0 { 54 return 55 } 56 f64.Ger(uintptr(m), uintptr(n), 57 alpha, 58 x, uintptr(incX), 59 y, uintptr(incY), 60 a, uintptr(lda)) 61 } 62 63 // Dgbmv performs one of the matrix-vector operations 64 // 65 // y = alpha * A * x + beta * y if tA == blas.NoTrans 66 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 67 // 68 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 69 // x and y are vectors, and alpha and beta are scalars. 70 func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 71 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 72 panic(badTranspose) 73 } 74 if m < 0 { 75 panic(mLT0) 76 } 77 if n < 0 { 78 panic(nLT0) 79 } 80 if kL < 0 { 81 panic(kLLT0) 82 } 83 if kU < 0 { 84 panic(kULT0) 85 } 86 if lda < kL+kU+1 { 87 panic(badLdA) 88 } 89 if incX == 0 { 90 panic(zeroIncX) 91 } 92 if incY == 0 { 93 panic(zeroIncY) 94 } 95 96 // Quick return if possible. 97 if m == 0 || n == 0 { 98 return 99 } 100 101 // For zero matrix size the following slice length checks are trivially satisfied. 102 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 103 panic(shortA) 104 } 105 lenX := m 106 lenY := n 107 if tA == blas.NoTrans { 108 lenX = n 109 lenY = m 110 } 111 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 112 panic(shortX) 113 } 114 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 115 panic(shortY) 116 } 117 118 // Quick return if possible. 119 if alpha == 0 && beta == 1 { 120 return 121 } 122 123 var kx, ky int 124 if incX < 0 { 125 kx = -(lenX - 1) * incX 126 } 127 if incY < 0 { 128 ky = -(lenY - 1) * incY 129 } 130 131 // Form y = beta * y. 132 if beta != 1 { 133 if incY == 1 { 134 if beta == 0 { 135 for i := range y[:lenY] { 136 y[i] = 0 137 } 138 } else { 139 f64.ScalUnitary(beta, y[:lenY]) 140 } 141 } else { 142 iy := ky 143 if beta == 0 { 144 for i := 0; i < lenY; i++ { 145 y[iy] = 0 146 iy += incY 147 } 148 } else { 149 if incY > 0 { 150 f64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 151 } else { 152 f64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 153 } 154 } 155 } 156 } 157 158 if alpha == 0 { 159 return 160 } 161 162 // i and j are indices of the compacted banded matrix. 163 // off is the offset into the dense matrix (off + j = densej) 164 nCol := kU + 1 + kL 165 if tA == blas.NoTrans { 166 iy := ky 167 if incX == 1 { 168 for i := 0; i < min(m, n+kL); i++ { 169 l := max(0, kL-i) 170 u := min(nCol, n+kL-i) 171 off := max(0, i-kL) 172 atmp := a[i*lda+l : i*lda+u] 173 xtmp := x[off : off+u-l] 174 var sum float64 175 for j, v := range atmp { 176 sum += xtmp[j] * v 177 } 178 y[iy] += sum * alpha 179 iy += incY 180 } 181 return 182 } 183 for i := 0; i < min(m, n+kL); i++ { 184 l := max(0, kL-i) 185 u := min(nCol, n+kL-i) 186 off := max(0, i-kL) 187 atmp := a[i*lda+l : i*lda+u] 188 jx := kx 189 var sum float64 190 for _, v := range atmp { 191 sum += x[off*incX+jx] * v 192 jx += incX 193 } 194 y[iy] += sum * alpha 195 iy += incY 196 } 197 return 198 } 199 if incX == 1 { 200 for i := 0; i < min(m, n+kL); i++ { 201 l := max(0, kL-i) 202 u := min(nCol, n+kL-i) 203 off := max(0, i-kL) 204 atmp := a[i*lda+l : i*lda+u] 205 tmp := alpha * x[i] 206 jy := ky 207 for _, v := range atmp { 208 y[jy+off*incY] += tmp * v 209 jy += incY 210 } 211 } 212 return 213 } 214 ix := kx 215 for i := 0; i < min(m, n+kL); i++ { 216 l := max(0, kL-i) 217 u := min(nCol, n+kL-i) 218 off := max(0, i-kL) 219 atmp := a[i*lda+l : i*lda+u] 220 tmp := alpha * x[ix] 221 jy := ky 222 for _, v := range atmp { 223 y[jy+off*incY] += tmp * v 224 jy += incY 225 } 226 ix += incX 227 } 228 } 229 230 // Dgemv computes 231 // 232 // y = alpha * A * x + beta * y if tA = blas.NoTrans 233 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 234 // 235 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 236 func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 237 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 238 panic(badTranspose) 239 } 240 if m < 0 { 241 panic(mLT0) 242 } 243 if n < 0 { 244 panic(nLT0) 245 } 246 if lda < max(1, n) { 247 panic(badLdA) 248 } 249 if incX == 0 { 250 panic(zeroIncX) 251 } 252 if incY == 0 { 253 panic(zeroIncY) 254 } 255 // Set up indexes 256 lenX := m 257 lenY := n 258 if tA == blas.NoTrans { 259 lenX = n 260 lenY = m 261 } 262 263 // Quick return if possible 264 if m == 0 || n == 0 { 265 return 266 } 267 268 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 269 panic(shortX) 270 } 271 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 272 panic(shortY) 273 } 274 if len(a) < lda*(m-1)+n { 275 panic(shortA) 276 } 277 278 // Quick return if possible 279 if alpha == 0 && beta == 1 { 280 return 281 } 282 283 if alpha == 0 { 284 // First form y = beta * y 285 if incY > 0 { 286 Implementation{}.Dscal(lenY, beta, y, incY) 287 } else { 288 Implementation{}.Dscal(lenY, beta, y, -incY) 289 } 290 return 291 } 292 293 // Form y = alpha * A * x + y 294 if tA == blas.NoTrans { 295 f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 296 return 297 } 298 // Cases where a is transposed. 299 f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 300 } 301 302 // Dtrmv performs one of the matrix-vector operations 303 // 304 // x = A * x if tA == blas.NoTrans 305 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 306 // 307 // where A is an n×n triangular matrix, and x is a vector. 308 func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 309 if ul != blas.Lower && ul != blas.Upper { 310 panic(badUplo) 311 } 312 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 313 panic(badTranspose) 314 } 315 if d != blas.NonUnit && d != blas.Unit { 316 panic(badDiag) 317 } 318 if n < 0 { 319 panic(nLT0) 320 } 321 if lda < max(1, n) { 322 panic(badLdA) 323 } 324 if incX == 0 { 325 panic(zeroIncX) 326 } 327 328 // Quick return if possible. 329 if n == 0 { 330 return 331 } 332 333 // For zero matrix size the following slice length checks are trivially satisfied. 334 if len(a) < lda*(n-1)+n { 335 panic(shortA) 336 } 337 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 338 panic(shortX) 339 } 340 341 nonUnit := d != blas.Unit 342 if n == 1 { 343 if nonUnit { 344 x[0] *= a[0] 345 } 346 return 347 } 348 var kx int 349 if incX <= 0 { 350 kx = -(n - 1) * incX 351 } 352 if tA == blas.NoTrans { 353 if ul == blas.Upper { 354 if incX == 1 { 355 for i := 0; i < n; i++ { 356 ilda := i * lda 357 var tmp float64 358 if nonUnit { 359 tmp = a[ilda+i] * x[i] 360 } else { 361 tmp = x[i] 362 } 363 x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 364 } 365 return 366 } 367 ix := kx 368 for i := 0; i < n; i++ { 369 ilda := i * lda 370 var tmp float64 371 if nonUnit { 372 tmp = a[ilda+i] * x[ix] 373 } else { 374 tmp = x[ix] 375 } 376 x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 377 ix += incX 378 } 379 return 380 } 381 if incX == 1 { 382 for i := n - 1; i >= 0; i-- { 383 ilda := i * lda 384 var tmp float64 385 if nonUnit { 386 tmp += a[ilda+i] * x[i] 387 } else { 388 tmp = x[i] 389 } 390 x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x[:i]) 391 } 392 return 393 } 394 ix := kx + (n-1)*incX 395 for i := n - 1; i >= 0; i-- { 396 ilda := i * lda 397 var tmp float64 398 if nonUnit { 399 tmp = a[ilda+i] * x[ix] 400 } else { 401 tmp = x[ix] 402 } 403 x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 404 ix -= incX 405 } 406 return 407 } 408 // Cases where a is transposed. 409 if ul == blas.Upper { 410 if incX == 1 { 411 for i := n - 1; i >= 0; i-- { 412 ilda := i * lda 413 xi := x[i] 414 f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 415 if nonUnit { 416 x[i] *= a[ilda+i] 417 } 418 } 419 return 420 } 421 ix := kx + (n-1)*incX 422 for i := n - 1; i >= 0; i-- { 423 ilda := i * lda 424 xi := x[ix] 425 f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 426 if nonUnit { 427 x[ix] *= a[ilda+i] 428 } 429 ix -= incX 430 } 431 return 432 } 433 if incX == 1 { 434 for i := 0; i < n; i++ { 435 ilda := i * lda 436 xi := x[i] 437 f64.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 438 if nonUnit { 439 x[i] *= a[i*lda+i] 440 } 441 } 442 return 443 } 444 ix := kx 445 for i := 0; i < n; i++ { 446 ilda := i * lda 447 xi := x[ix] 448 f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 449 if nonUnit { 450 x[ix] *= a[ilda+i] 451 } 452 ix += incX 453 } 454 } 455 456 // Dtrsv solves one of the systems of equations 457 // 458 // A * x = b if tA == blas.NoTrans 459 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 460 // 461 // where A is an n×n triangular matrix, and x and b are vectors. 462 // 463 // At entry to the function, x contains the values of b, and the result is 464 // stored in-place into x. 465 // 466 // No test for singularity or near-singularity is included in this 467 // routine. Such tests must be performed before calling this routine. 468 func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 469 if ul != blas.Lower && ul != blas.Upper { 470 panic(badUplo) 471 } 472 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 473 panic(badTranspose) 474 } 475 if d != blas.NonUnit && d != blas.Unit { 476 panic(badDiag) 477 } 478 if n < 0 { 479 panic(nLT0) 480 } 481 if lda < max(1, n) { 482 panic(badLdA) 483 } 484 if incX == 0 { 485 panic(zeroIncX) 486 } 487 488 // Quick return if possible. 489 if n == 0 { 490 return 491 } 492 493 // For zero matrix size the following slice length checks are trivially satisfied. 494 if len(a) < lda*(n-1)+n { 495 panic(shortA) 496 } 497 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 498 panic(shortX) 499 } 500 501 if n == 1 { 502 if d == blas.NonUnit { 503 x[0] /= a[0] 504 } 505 return 506 } 507 508 var kx int 509 if incX < 0 { 510 kx = -(n - 1) * incX 511 } 512 nonUnit := d == blas.NonUnit 513 if tA == blas.NoTrans { 514 if ul == blas.Upper { 515 if incX == 1 { 516 for i := n - 1; i >= 0; i-- { 517 var sum float64 518 atmp := a[i*lda+i+1 : i*lda+n] 519 for j, v := range atmp { 520 jv := i + j + 1 521 sum += x[jv] * v 522 } 523 x[i] -= sum 524 if nonUnit { 525 x[i] /= a[i*lda+i] 526 } 527 } 528 return 529 } 530 ix := kx + (n-1)*incX 531 for i := n - 1; i >= 0; i-- { 532 var sum float64 533 jx := ix + incX 534 atmp := a[i*lda+i+1 : i*lda+n] 535 for _, v := range atmp { 536 sum += x[jx] * v 537 jx += incX 538 } 539 x[ix] -= sum 540 if nonUnit { 541 x[ix] /= a[i*lda+i] 542 } 543 ix -= incX 544 } 545 return 546 } 547 if incX == 1 { 548 for i := 0; i < n; i++ { 549 var sum float64 550 atmp := a[i*lda : i*lda+i] 551 for j, v := range atmp { 552 sum += x[j] * v 553 } 554 x[i] -= sum 555 if nonUnit { 556 x[i] /= a[i*lda+i] 557 } 558 } 559 return 560 } 561 ix := kx 562 for i := 0; i < n; i++ { 563 jx := kx 564 var sum float64 565 atmp := a[i*lda : i*lda+i] 566 for _, v := range atmp { 567 sum += x[jx] * v 568 jx += incX 569 } 570 x[ix] -= sum 571 if nonUnit { 572 x[ix] /= a[i*lda+i] 573 } 574 ix += incX 575 } 576 return 577 } 578 // Cases where a is transposed. 579 if ul == blas.Upper { 580 if incX == 1 { 581 for i := 0; i < n; i++ { 582 if nonUnit { 583 x[i] /= a[i*lda+i] 584 } 585 xi := x[i] 586 atmp := a[i*lda+i+1 : i*lda+n] 587 for j, v := range atmp { 588 jv := j + i + 1 589 x[jv] -= v * xi 590 } 591 } 592 return 593 } 594 ix := kx 595 for i := 0; i < n; i++ { 596 if nonUnit { 597 x[ix] /= a[i*lda+i] 598 } 599 xi := x[ix] 600 jx := kx + (i+1)*incX 601 atmp := a[i*lda+i+1 : i*lda+n] 602 for _, v := range atmp { 603 x[jx] -= v * xi 604 jx += incX 605 } 606 ix += incX 607 } 608 return 609 } 610 if incX == 1 { 611 for i := n - 1; i >= 0; i-- { 612 if nonUnit { 613 x[i] /= a[i*lda+i] 614 } 615 xi := x[i] 616 atmp := a[i*lda : i*lda+i] 617 for j, v := range atmp { 618 x[j] -= v * xi 619 } 620 } 621 return 622 } 623 ix := kx + (n-1)*incX 624 for i := n - 1; i >= 0; i-- { 625 if nonUnit { 626 x[ix] /= a[i*lda+i] 627 } 628 xi := x[ix] 629 jx := kx 630 atmp := a[i*lda : i*lda+i] 631 for _, v := range atmp { 632 x[jx] -= v * xi 633 jx += incX 634 } 635 ix -= incX 636 } 637 } 638 639 // Dsymv performs the matrix-vector operation 640 // 641 // y = alpha * A * x + beta * y 642 // 643 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 644 // beta are scalars. 645 func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 646 if ul != blas.Lower && ul != blas.Upper { 647 panic(badUplo) 648 } 649 if n < 0 { 650 panic(nLT0) 651 } 652 if lda < max(1, n) { 653 panic(badLdA) 654 } 655 if incX == 0 { 656 panic(zeroIncX) 657 } 658 if incY == 0 { 659 panic(zeroIncY) 660 } 661 662 // Quick return if possible. 663 if n == 0 { 664 return 665 } 666 667 // For zero matrix size the following slice length checks are trivially satisfied. 668 if len(a) < lda*(n-1)+n { 669 panic(shortA) 670 } 671 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 672 panic(shortX) 673 } 674 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 675 panic(shortY) 676 } 677 678 // Quick return if possible. 679 if alpha == 0 && beta == 1 { 680 return 681 } 682 683 // Set up start points 684 var kx, ky int 685 if incX < 0 { 686 kx = -(n - 1) * incX 687 } 688 if incY < 0 { 689 ky = -(n - 1) * incY 690 } 691 692 // Form y = beta * y 693 if beta != 1 { 694 if incY == 1 { 695 if beta == 0 { 696 for i := range y[:n] { 697 y[i] = 0 698 } 699 } else { 700 f64.ScalUnitary(beta, y[:n]) 701 } 702 } else { 703 iy := ky 704 if beta == 0 { 705 for i := 0; i < n; i++ { 706 y[iy] = 0 707 iy += incY 708 } 709 } else { 710 if incY > 0 { 711 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 712 } else { 713 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 714 } 715 } 716 } 717 } 718 719 if alpha == 0 { 720 return 721 } 722 723 if n == 1 { 724 y[0] += alpha * a[0] * x[0] 725 return 726 } 727 728 if ul == blas.Upper { 729 if incX == 1 { 730 iy := ky 731 for i := 0; i < n; i++ { 732 xv := x[i] * alpha 733 sum := x[i] * a[i*lda+i] 734 jy := ky + (i+1)*incY 735 atmp := a[i*lda+i+1 : i*lda+n] 736 for j, v := range atmp { 737 jp := j + i + 1 738 sum += x[jp] * v 739 y[jy] += xv * v 740 jy += incY 741 } 742 y[iy] += alpha * sum 743 iy += incY 744 } 745 return 746 } 747 ix := kx 748 iy := ky 749 for i := 0; i < n; i++ { 750 xv := x[ix] * alpha 751 sum := x[ix] * a[i*lda+i] 752 jx := kx + (i+1)*incX 753 jy := ky + (i+1)*incY 754 atmp := a[i*lda+i+1 : i*lda+n] 755 for _, v := range atmp { 756 sum += x[jx] * v 757 y[jy] += xv * v 758 jx += incX 759 jy += incY 760 } 761 y[iy] += alpha * sum 762 ix += incX 763 iy += incY 764 } 765 return 766 } 767 // Cases where a is lower triangular. 768 if incX == 1 { 769 iy := ky 770 for i := 0; i < n; i++ { 771 jy := ky 772 xv := alpha * x[i] 773 atmp := a[i*lda : i*lda+i] 774 var sum float64 775 for j, v := range atmp { 776 sum += x[j] * v 777 y[jy] += xv * v 778 jy += incY 779 } 780 sum += x[i] * a[i*lda+i] 781 sum *= alpha 782 y[iy] += sum 783 iy += incY 784 } 785 return 786 } 787 ix := kx 788 iy := ky 789 for i := 0; i < n; i++ { 790 jx := kx 791 jy := ky 792 xv := alpha * x[ix] 793 atmp := a[i*lda : i*lda+i] 794 var sum float64 795 for _, v := range atmp { 796 sum += x[jx] * v 797 y[jy] += xv * v 798 jx += incX 799 jy += incY 800 } 801 sum += x[ix] * a[i*lda+i] 802 sum *= alpha 803 y[iy] += sum 804 ix += incX 805 iy += incY 806 } 807 } 808 809 // Dtbmv performs one of the matrix-vector operations 810 // 811 // x = A * x if tA == blas.NoTrans 812 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 813 // 814 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 815 func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 816 if ul != blas.Lower && ul != blas.Upper { 817 panic(badUplo) 818 } 819 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 820 panic(badTranspose) 821 } 822 if d != blas.NonUnit && d != blas.Unit { 823 panic(badDiag) 824 } 825 if n < 0 { 826 panic(nLT0) 827 } 828 if k < 0 { 829 panic(kLT0) 830 } 831 if lda < k+1 { 832 panic(badLdA) 833 } 834 if incX == 0 { 835 panic(zeroIncX) 836 } 837 838 // Quick return if possible. 839 if n == 0 { 840 return 841 } 842 843 // For zero matrix size the following slice length checks are trivially satisfied. 844 if len(a) < lda*(n-1)+k+1 { 845 panic(shortA) 846 } 847 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 848 panic(shortX) 849 } 850 851 var kx int 852 if incX < 0 { 853 kx = -(n - 1) * incX 854 } 855 856 nonunit := d != blas.Unit 857 858 if tA == blas.NoTrans { 859 if ul == blas.Upper { 860 if incX == 1 { 861 for i := 0; i < n; i++ { 862 u := min(1+k, n-i) 863 var sum float64 864 atmp := a[i*lda:] 865 xtmp := x[i:] 866 for j := 1; j < u; j++ { 867 sum += xtmp[j] * atmp[j] 868 } 869 if nonunit { 870 sum += xtmp[0] * atmp[0] 871 } else { 872 sum += xtmp[0] 873 } 874 x[i] = sum 875 } 876 return 877 } 878 ix := kx 879 for i := 0; i < n; i++ { 880 u := min(1+k, n-i) 881 var sum float64 882 atmp := a[i*lda:] 883 jx := incX 884 for j := 1; j < u; j++ { 885 sum += x[ix+jx] * atmp[j] 886 jx += incX 887 } 888 if nonunit { 889 sum += x[ix] * atmp[0] 890 } else { 891 sum += x[ix] 892 } 893 x[ix] = sum 894 ix += incX 895 } 896 return 897 } 898 if incX == 1 { 899 for i := n - 1; i >= 0; i-- { 900 l := max(0, k-i) 901 atmp := a[i*lda:] 902 var sum float64 903 for j := l; j < k; j++ { 904 sum += x[i-k+j] * atmp[j] 905 } 906 if nonunit { 907 sum += x[i] * atmp[k] 908 } else { 909 sum += x[i] 910 } 911 x[i] = sum 912 } 913 return 914 } 915 ix := kx + (n-1)*incX 916 for i := n - 1; i >= 0; i-- { 917 l := max(0, k-i) 918 atmp := a[i*lda:] 919 var sum float64 920 jx := l * incX 921 for j := l; j < k; j++ { 922 sum += x[ix-k*incX+jx] * atmp[j] 923 jx += incX 924 } 925 if nonunit { 926 sum += x[ix] * atmp[k] 927 } else { 928 sum += x[ix] 929 } 930 x[ix] = sum 931 ix -= incX 932 } 933 return 934 } 935 if ul == blas.Upper { 936 if incX == 1 { 937 for i := n - 1; i >= 0; i-- { 938 u := k + 1 939 if i < u { 940 u = i + 1 941 } 942 var sum float64 943 for j := 1; j < u; j++ { 944 sum += x[i-j] * a[(i-j)*lda+j] 945 } 946 if nonunit { 947 sum += x[i] * a[i*lda] 948 } else { 949 sum += x[i] 950 } 951 x[i] = sum 952 } 953 return 954 } 955 ix := kx + (n-1)*incX 956 for i := n - 1; i >= 0; i-- { 957 u := k + 1 958 if i < u { 959 u = i + 1 960 } 961 var sum float64 962 jx := incX 963 for j := 1; j < u; j++ { 964 sum += x[ix-jx] * a[(i-j)*lda+j] 965 jx += incX 966 } 967 if nonunit { 968 sum += x[ix] * a[i*lda] 969 } else { 970 sum += x[ix] 971 } 972 x[ix] = sum 973 ix -= incX 974 } 975 return 976 } 977 if incX == 1 { 978 for i := 0; i < n; i++ { 979 u := k 980 if i+k >= n { 981 u = n - i - 1 982 } 983 var sum float64 984 for j := 0; j < u; j++ { 985 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 986 } 987 if nonunit { 988 sum += x[i] * a[i*lda+k] 989 } else { 990 sum += x[i] 991 } 992 x[i] = sum 993 } 994 return 995 } 996 ix := kx 997 for i := 0; i < n; i++ { 998 u := k 999 if i+k >= n { 1000 u = n - i - 1 1001 } 1002 var ( 1003 sum float64 1004 jx int 1005 ) 1006 for j := 0; j < u; j++ { 1007 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1008 jx += incX 1009 } 1010 if nonunit { 1011 sum += x[ix] * a[i*lda+k] 1012 } else { 1013 sum += x[ix] 1014 } 1015 x[ix] = sum 1016 ix += incX 1017 } 1018 } 1019 1020 // Dtpmv performs one of the matrix-vector operations 1021 // 1022 // x = A * x if tA == blas.NoTrans 1023 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 1024 // 1025 // where A is an n×n triangular matrix in packed format, and x is a vector. 1026 func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 1027 if ul != blas.Lower && ul != blas.Upper { 1028 panic(badUplo) 1029 } 1030 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1031 panic(badTranspose) 1032 } 1033 if d != blas.NonUnit && d != blas.Unit { 1034 panic(badDiag) 1035 } 1036 if n < 0 { 1037 panic(nLT0) 1038 } 1039 if incX == 0 { 1040 panic(zeroIncX) 1041 } 1042 1043 // Quick return if possible. 1044 if n == 0 { 1045 return 1046 } 1047 1048 // For zero matrix size the following slice length checks are trivially satisfied. 1049 if len(ap) < n*(n+1)/2 { 1050 panic(shortAP) 1051 } 1052 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1053 panic(shortX) 1054 } 1055 1056 var kx int 1057 if incX < 0 { 1058 kx = -(n - 1) * incX 1059 } 1060 1061 nonUnit := d == blas.NonUnit 1062 var offset int // Offset is the index of (i,i) 1063 if tA == blas.NoTrans { 1064 if ul == blas.Upper { 1065 if incX == 1 { 1066 for i := 0; i < n; i++ { 1067 xi := x[i] 1068 if nonUnit { 1069 xi *= ap[offset] 1070 } 1071 atmp := ap[offset+1 : offset+n-i] 1072 xtmp := x[i+1:] 1073 for j, v := range atmp { 1074 xi += v * xtmp[j] 1075 } 1076 x[i] = xi 1077 offset += n - i 1078 } 1079 return 1080 } 1081 ix := kx 1082 for i := 0; i < n; i++ { 1083 xix := x[ix] 1084 if nonUnit { 1085 xix *= ap[offset] 1086 } 1087 atmp := ap[offset+1 : offset+n-i] 1088 jx := kx + (i+1)*incX 1089 for _, v := range atmp { 1090 xix += v * x[jx] 1091 jx += incX 1092 } 1093 x[ix] = xix 1094 offset += n - i 1095 ix += incX 1096 } 1097 return 1098 } 1099 if incX == 1 { 1100 offset = n*(n+1)/2 - 1 1101 for i := n - 1; i >= 0; i-- { 1102 xi := x[i] 1103 if nonUnit { 1104 xi *= ap[offset] 1105 } 1106 atmp := ap[offset-i : offset] 1107 for j, v := range atmp { 1108 xi += v * x[j] 1109 } 1110 x[i] = xi 1111 offset -= i + 1 1112 } 1113 return 1114 } 1115 ix := kx + (n-1)*incX 1116 offset = n*(n+1)/2 - 1 1117 for i := n - 1; i >= 0; i-- { 1118 xix := x[ix] 1119 if nonUnit { 1120 xix *= ap[offset] 1121 } 1122 atmp := ap[offset-i : offset] 1123 jx := kx 1124 for _, v := range atmp { 1125 xix += v * x[jx] 1126 jx += incX 1127 } 1128 x[ix] = xix 1129 offset -= i + 1 1130 ix -= incX 1131 } 1132 return 1133 } 1134 // Cases where ap is transposed. 1135 if ul == blas.Upper { 1136 if incX == 1 { 1137 offset = n*(n+1)/2 - 1 1138 for i := n - 1; i >= 0; i-- { 1139 xi := x[i] 1140 atmp := ap[offset+1 : offset+n-i] 1141 xtmp := x[i+1:] 1142 for j, v := range atmp { 1143 xtmp[j] += v * xi 1144 } 1145 if nonUnit { 1146 x[i] *= ap[offset] 1147 } 1148 offset -= n - i + 1 1149 } 1150 return 1151 } 1152 ix := kx + (n-1)*incX 1153 offset = n*(n+1)/2 - 1 1154 for i := n - 1; i >= 0; i-- { 1155 xix := x[ix] 1156 jx := kx + (i+1)*incX 1157 atmp := ap[offset+1 : offset+n-i] 1158 for _, v := range atmp { 1159 x[jx] += v * xix 1160 jx += incX 1161 } 1162 if nonUnit { 1163 x[ix] *= ap[offset] 1164 } 1165 offset -= n - i + 1 1166 ix -= incX 1167 } 1168 return 1169 } 1170 if incX == 1 { 1171 for i := 0; i < n; i++ { 1172 xi := x[i] 1173 atmp := ap[offset-i : offset] 1174 for j, v := range atmp { 1175 x[j] += v * xi 1176 } 1177 if nonUnit { 1178 x[i] *= ap[offset] 1179 } 1180 offset += i + 2 1181 } 1182 return 1183 } 1184 ix := kx 1185 for i := 0; i < n; i++ { 1186 xix := x[ix] 1187 jx := kx 1188 atmp := ap[offset-i : offset] 1189 for _, v := range atmp { 1190 x[jx] += v * xix 1191 jx += incX 1192 } 1193 if nonUnit { 1194 x[ix] *= ap[offset] 1195 } 1196 ix += incX 1197 offset += i + 2 1198 } 1199 } 1200 1201 // Dtbsv solves one of the systems of equations 1202 // 1203 // A * x = b if tA == blas.NoTrans 1204 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1205 // 1206 // where A is an n×n triangular band matrix with k+1 diagonals, 1207 // and x and b are vectors. 1208 // 1209 // At entry to the function, x contains the values of b, and the result is 1210 // stored in-place into x. 1211 // 1212 // No test for singularity or near-singularity is included in this 1213 // routine. Such tests must be performed before calling this routine. 1214 func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 1215 if ul != blas.Lower && ul != blas.Upper { 1216 panic(badUplo) 1217 } 1218 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1219 panic(badTranspose) 1220 } 1221 if d != blas.NonUnit && d != blas.Unit { 1222 panic(badDiag) 1223 } 1224 if n < 0 { 1225 panic(nLT0) 1226 } 1227 if k < 0 { 1228 panic(kLT0) 1229 } 1230 if lda < k+1 { 1231 panic(badLdA) 1232 } 1233 if incX == 0 { 1234 panic(zeroIncX) 1235 } 1236 1237 // Quick return if possible. 1238 if n == 0 { 1239 return 1240 } 1241 1242 // For zero matrix size the following slice length checks are trivially satisfied. 1243 if len(a) < lda*(n-1)+k+1 { 1244 panic(shortA) 1245 } 1246 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1247 panic(shortX) 1248 } 1249 1250 var kx int 1251 if incX < 0 { 1252 kx = -(n - 1) * incX 1253 } 1254 nonUnit := d == blas.NonUnit 1255 // Form x = A^-1 x. 1256 // Several cases below use subslices for speed improvement. 1257 // The incX != 1 cases usually do not because incX may be negative. 1258 if tA == blas.NoTrans { 1259 if ul == blas.Upper { 1260 if incX == 1 { 1261 for i := n - 1; i >= 0; i-- { 1262 bands := k 1263 if i+bands >= n { 1264 bands = n - i - 1 1265 } 1266 atmp := a[i*lda+1:] 1267 xtmp := x[i+1 : i+bands+1] 1268 var sum float64 1269 for j, v := range xtmp { 1270 sum += v * atmp[j] 1271 } 1272 x[i] -= sum 1273 if nonUnit { 1274 x[i] /= a[i*lda] 1275 } 1276 } 1277 return 1278 } 1279 ix := kx + (n-1)*incX 1280 for i := n - 1; i >= 0; i-- { 1281 max := k + 1 1282 if i+max > n { 1283 max = n - i 1284 } 1285 atmp := a[i*lda:] 1286 var ( 1287 jx int 1288 sum float64 1289 ) 1290 for j := 1; j < max; j++ { 1291 jx += incX 1292 sum += x[ix+jx] * atmp[j] 1293 } 1294 x[ix] -= sum 1295 if nonUnit { 1296 x[ix] /= atmp[0] 1297 } 1298 ix -= incX 1299 } 1300 return 1301 } 1302 if incX == 1 { 1303 for i := 0; i < n; i++ { 1304 bands := k 1305 if i-k < 0 { 1306 bands = i 1307 } 1308 atmp := a[i*lda+k-bands:] 1309 xtmp := x[i-bands : i] 1310 var sum float64 1311 for j, v := range xtmp { 1312 sum += v * atmp[j] 1313 } 1314 x[i] -= sum 1315 if nonUnit { 1316 x[i] /= atmp[bands] 1317 } 1318 } 1319 return 1320 } 1321 ix := kx 1322 for i := 0; i < n; i++ { 1323 bands := k 1324 if i-k < 0 { 1325 bands = i 1326 } 1327 atmp := a[i*lda+k-bands:] 1328 var ( 1329 sum float64 1330 jx int 1331 ) 1332 for j := 0; j < bands; j++ { 1333 sum += x[ix-bands*incX+jx] * atmp[j] 1334 jx += incX 1335 } 1336 x[ix] -= sum 1337 if nonUnit { 1338 x[ix] /= atmp[bands] 1339 } 1340 ix += incX 1341 } 1342 return 1343 } 1344 // Cases where a is transposed. 1345 if ul == blas.Upper { 1346 if incX == 1 { 1347 for i := 0; i < n; i++ { 1348 bands := k 1349 if i-k < 0 { 1350 bands = i 1351 } 1352 var sum float64 1353 for j := 0; j < bands; j++ { 1354 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1355 } 1356 x[i] -= sum 1357 if nonUnit { 1358 x[i] /= a[i*lda] 1359 } 1360 } 1361 return 1362 } 1363 ix := kx 1364 for i := 0; i < n; i++ { 1365 bands := k 1366 if i-k < 0 { 1367 bands = i 1368 } 1369 var ( 1370 sum float64 1371 jx int 1372 ) 1373 for j := 0; j < bands; j++ { 1374 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1375 jx += incX 1376 } 1377 x[ix] -= sum 1378 if nonUnit { 1379 x[ix] /= a[i*lda] 1380 } 1381 ix += incX 1382 } 1383 return 1384 } 1385 if incX == 1 { 1386 for i := n - 1; i >= 0; i-- { 1387 bands := k 1388 if i+bands >= n { 1389 bands = n - i - 1 1390 } 1391 var sum float64 1392 xtmp := x[i+1 : i+1+bands] 1393 for j, v := range xtmp { 1394 sum += v * a[(i+j+1)*lda+k-j-1] 1395 } 1396 x[i] -= sum 1397 if nonUnit { 1398 x[i] /= a[i*lda+k] 1399 } 1400 } 1401 return 1402 } 1403 ix := kx + (n-1)*incX 1404 for i := n - 1; i >= 0; i-- { 1405 bands := k 1406 if i+bands >= n { 1407 bands = n - i - 1 1408 } 1409 var ( 1410 sum float64 1411 jx int 1412 ) 1413 for j := 0; j < bands; j++ { 1414 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1415 jx += incX 1416 } 1417 x[ix] -= sum 1418 if nonUnit { 1419 x[ix] /= a[i*lda+k] 1420 } 1421 ix -= incX 1422 } 1423 } 1424 1425 // Dsbmv performs the matrix-vector operation 1426 // 1427 // y = alpha * A * x + beta * y 1428 // 1429 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1430 // vectors, and alpha and beta are scalars. 1431 func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 1432 if ul != blas.Lower && ul != blas.Upper { 1433 panic(badUplo) 1434 } 1435 if n < 0 { 1436 panic(nLT0) 1437 } 1438 if k < 0 { 1439 panic(kLT0) 1440 } 1441 if lda < k+1 { 1442 panic(badLdA) 1443 } 1444 if incX == 0 { 1445 panic(zeroIncX) 1446 } 1447 if incY == 0 { 1448 panic(zeroIncY) 1449 } 1450 1451 // Quick return if possible. 1452 if n == 0 { 1453 return 1454 } 1455 1456 // For zero matrix size the following slice length checks are trivially satisfied. 1457 if len(a) < lda*(n-1)+k+1 { 1458 panic(shortA) 1459 } 1460 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1461 panic(shortX) 1462 } 1463 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1464 panic(shortY) 1465 } 1466 1467 // Quick return if possible. 1468 if alpha == 0 && beta == 1 { 1469 return 1470 } 1471 1472 // Set up indexes 1473 lenX := n 1474 lenY := n 1475 var kx, ky int 1476 if incX < 0 { 1477 kx = -(lenX - 1) * incX 1478 } 1479 if incY < 0 { 1480 ky = -(lenY - 1) * incY 1481 } 1482 1483 // Form y = beta * y. 1484 if beta != 1 { 1485 if incY == 1 { 1486 if beta == 0 { 1487 for i := range y[:n] { 1488 y[i] = 0 1489 } 1490 } else { 1491 f64.ScalUnitary(beta, y[:n]) 1492 } 1493 } else { 1494 iy := ky 1495 if beta == 0 { 1496 for i := 0; i < n; i++ { 1497 y[iy] = 0 1498 iy += incY 1499 } 1500 } else { 1501 if incY > 0 { 1502 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1503 } else { 1504 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1505 } 1506 } 1507 } 1508 } 1509 1510 if alpha == 0 { 1511 return 1512 } 1513 1514 if ul == blas.Upper { 1515 if incX == 1 { 1516 iy := ky 1517 for i := 0; i < n; i++ { 1518 atmp := a[i*lda:] 1519 tmp := alpha * x[i] 1520 sum := tmp * atmp[0] 1521 u := min(k, n-i-1) 1522 jy := incY 1523 for j := 1; j <= u; j++ { 1524 v := atmp[j] 1525 sum += alpha * x[i+j] * v 1526 y[iy+jy] += tmp * v 1527 jy += incY 1528 } 1529 y[iy] += sum 1530 iy += incY 1531 } 1532 return 1533 } 1534 ix := kx 1535 iy := ky 1536 for i := 0; i < n; i++ { 1537 atmp := a[i*lda:] 1538 tmp := alpha * x[ix] 1539 sum := tmp * atmp[0] 1540 u := min(k, n-i-1) 1541 jx := incX 1542 jy := incY 1543 for j := 1; j <= u; j++ { 1544 v := atmp[j] 1545 sum += alpha * x[ix+jx] * v 1546 y[iy+jy] += tmp * v 1547 jx += incX 1548 jy += incY 1549 } 1550 y[iy] += sum 1551 ix += incX 1552 iy += incY 1553 } 1554 return 1555 } 1556 1557 // Cases where a has bands below the diagonal. 1558 if incX == 1 { 1559 iy := ky 1560 for i := 0; i < n; i++ { 1561 l := max(0, k-i) 1562 tmp := alpha * x[i] 1563 jy := l * incY 1564 atmp := a[i*lda:] 1565 for j := l; j < k; j++ { 1566 v := atmp[j] 1567 y[iy] += alpha * v * x[i-k+j] 1568 y[iy-k*incY+jy] += tmp * v 1569 jy += incY 1570 } 1571 y[iy] += tmp * atmp[k] 1572 iy += incY 1573 } 1574 return 1575 } 1576 ix := kx 1577 iy := ky 1578 for i := 0; i < n; i++ { 1579 l := max(0, k-i) 1580 tmp := alpha * x[ix] 1581 jx := l * incX 1582 jy := l * incY 1583 atmp := a[i*lda:] 1584 for j := l; j < k; j++ { 1585 v := atmp[j] 1586 y[iy] += alpha * v * x[ix-k*incX+jx] 1587 y[iy-k*incY+jy] += tmp * v 1588 jx += incX 1589 jy += incY 1590 } 1591 y[iy] += tmp * atmp[k] 1592 ix += incX 1593 iy += incY 1594 } 1595 } 1596 1597 // Dsyr performs the symmetric rank-one update 1598 // 1599 // A += alpha * x * xᵀ 1600 // 1601 // where A is an n×n symmetric matrix, and x is a vector. 1602 func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { 1603 if ul != blas.Lower && ul != blas.Upper { 1604 panic(badUplo) 1605 } 1606 if n < 0 { 1607 panic(nLT0) 1608 } 1609 if lda < max(1, n) { 1610 panic(badLdA) 1611 } 1612 if incX == 0 { 1613 panic(zeroIncX) 1614 } 1615 1616 // Quick return if possible. 1617 if n == 0 { 1618 return 1619 } 1620 1621 // For zero matrix size the following slice length checks are trivially satisfied. 1622 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1623 panic(shortX) 1624 } 1625 if len(a) < lda*(n-1)+n { 1626 panic(shortA) 1627 } 1628 1629 // Quick return if possible. 1630 if alpha == 0 { 1631 return 1632 } 1633 1634 lenX := n 1635 var kx int 1636 if incX < 0 { 1637 kx = -(lenX - 1) * incX 1638 } 1639 if ul == blas.Upper { 1640 if incX == 1 { 1641 for i := 0; i < n; i++ { 1642 tmp := x[i] * alpha 1643 if tmp != 0 { 1644 atmp := a[i*lda+i : i*lda+n] 1645 xtmp := x[i:n] 1646 for j, v := range xtmp { 1647 atmp[j] += v * tmp 1648 } 1649 } 1650 } 1651 return 1652 } 1653 ix := kx 1654 for i := 0; i < n; i++ { 1655 tmp := x[ix] * alpha 1656 if tmp != 0 { 1657 jx := ix 1658 atmp := a[i*lda:] 1659 for j := i; j < n; j++ { 1660 atmp[j] += x[jx] * tmp 1661 jx += incX 1662 } 1663 } 1664 ix += incX 1665 } 1666 return 1667 } 1668 // Cases where a is lower triangular. 1669 if incX == 1 { 1670 for i := 0; i < n; i++ { 1671 tmp := x[i] * alpha 1672 if tmp != 0 { 1673 atmp := a[i*lda:] 1674 xtmp := x[:i+1] 1675 for j, v := range xtmp { 1676 atmp[j] += tmp * v 1677 } 1678 } 1679 } 1680 return 1681 } 1682 ix := kx 1683 for i := 0; i < n; i++ { 1684 tmp := x[ix] * alpha 1685 if tmp != 0 { 1686 atmp := a[i*lda:] 1687 jx := kx 1688 for j := 0; j < i+1; j++ { 1689 atmp[j] += tmp * x[jx] 1690 jx += incX 1691 } 1692 } 1693 ix += incX 1694 } 1695 } 1696 1697 // Dsyr2 performs the symmetric rank-two update 1698 // 1699 // A += alpha * x * yᵀ + alpha * y * xᵀ 1700 // 1701 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1702 func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 1703 if ul != blas.Lower && ul != blas.Upper { 1704 panic(badUplo) 1705 } 1706 if n < 0 { 1707 panic(nLT0) 1708 } 1709 if lda < max(1, n) { 1710 panic(badLdA) 1711 } 1712 if incX == 0 { 1713 panic(zeroIncX) 1714 } 1715 if incY == 0 { 1716 panic(zeroIncY) 1717 } 1718 1719 // Quick return if possible. 1720 if n == 0 { 1721 return 1722 } 1723 1724 // For zero matrix size the following slice length checks are trivially satisfied. 1725 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1726 panic(shortX) 1727 } 1728 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1729 panic(shortY) 1730 } 1731 if len(a) < lda*(n-1)+n { 1732 panic(shortA) 1733 } 1734 1735 // Quick return if possible. 1736 if alpha == 0 { 1737 return 1738 } 1739 1740 var ky, kx int 1741 if incY < 0 { 1742 ky = -(n - 1) * incY 1743 } 1744 if incX < 0 { 1745 kx = -(n - 1) * incX 1746 } 1747 if ul == blas.Upper { 1748 if incX == 1 && incY == 1 { 1749 for i := 0; i < n; i++ { 1750 xi := x[i] 1751 yi := y[i] 1752 atmp := a[i*lda:] 1753 for j := i; j < n; j++ { 1754 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1755 } 1756 } 1757 return 1758 } 1759 ix := kx 1760 iy := ky 1761 for i := 0; i < n; i++ { 1762 jx := kx + i*incX 1763 jy := ky + i*incY 1764 xi := x[ix] 1765 yi := y[iy] 1766 atmp := a[i*lda:] 1767 for j := i; j < n; j++ { 1768 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1769 jx += incX 1770 jy += incY 1771 } 1772 ix += incX 1773 iy += incY 1774 } 1775 return 1776 } 1777 if incX == 1 && incY == 1 { 1778 for i := 0; i < n; i++ { 1779 xi := x[i] 1780 yi := y[i] 1781 atmp := a[i*lda:] 1782 for j := 0; j <= i; j++ { 1783 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1784 } 1785 } 1786 return 1787 } 1788 ix := kx 1789 iy := ky 1790 for i := 0; i < n; i++ { 1791 jx := kx 1792 jy := ky 1793 xi := x[ix] 1794 yi := y[iy] 1795 atmp := a[i*lda:] 1796 for j := 0; j <= i; j++ { 1797 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1798 jx += incX 1799 jy += incY 1800 } 1801 ix += incX 1802 iy += incY 1803 } 1804 } 1805 1806 // Dtpsv solves one of the systems of equations 1807 // 1808 // A * x = b if tA == blas.NoTrans 1809 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1810 // 1811 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1812 // 1813 // At entry to the function, x contains the values of b, and the result is 1814 // stored in-place into x. 1815 // 1816 // No test for singularity or near-singularity is included in this 1817 // routine. Such tests must be performed before calling this routine. 1818 func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 1819 if ul != blas.Lower && ul != blas.Upper { 1820 panic(badUplo) 1821 } 1822 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1823 panic(badTranspose) 1824 } 1825 if d != blas.NonUnit && d != blas.Unit { 1826 panic(badDiag) 1827 } 1828 if n < 0 { 1829 panic(nLT0) 1830 } 1831 if incX == 0 { 1832 panic(zeroIncX) 1833 } 1834 1835 // Quick return if possible. 1836 if n == 0 { 1837 return 1838 } 1839 1840 // For zero matrix size the following slice length checks are trivially satisfied. 1841 if len(ap) < n*(n+1)/2 { 1842 panic(shortAP) 1843 } 1844 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1845 panic(shortX) 1846 } 1847 1848 var kx int 1849 if incX < 0 { 1850 kx = -(n - 1) * incX 1851 } 1852 1853 nonUnit := d == blas.NonUnit 1854 var offset int // Offset is the index of (i,i) 1855 if tA == blas.NoTrans { 1856 if ul == blas.Upper { 1857 offset = n*(n+1)/2 - 1 1858 if incX == 1 { 1859 for i := n - 1; i >= 0; i-- { 1860 atmp := ap[offset+1 : offset+n-i] 1861 xtmp := x[i+1:] 1862 var sum float64 1863 for j, v := range atmp { 1864 sum += v * xtmp[j] 1865 } 1866 x[i] -= sum 1867 if nonUnit { 1868 x[i] /= ap[offset] 1869 } 1870 offset -= n - i + 1 1871 } 1872 return 1873 } 1874 ix := kx + (n-1)*incX 1875 for i := n - 1; i >= 0; i-- { 1876 atmp := ap[offset+1 : offset+n-i] 1877 jx := kx + (i+1)*incX 1878 var sum float64 1879 for _, v := range atmp { 1880 sum += v * x[jx] 1881 jx += incX 1882 } 1883 x[ix] -= sum 1884 if nonUnit { 1885 x[ix] /= ap[offset] 1886 } 1887 ix -= incX 1888 offset -= n - i + 1 1889 } 1890 return 1891 } 1892 if incX == 1 { 1893 for i := 0; i < n; i++ { 1894 atmp := ap[offset-i : offset] 1895 var sum float64 1896 for j, v := range atmp { 1897 sum += v * x[j] 1898 } 1899 x[i] -= sum 1900 if nonUnit { 1901 x[i] /= ap[offset] 1902 } 1903 offset += i + 2 1904 } 1905 return 1906 } 1907 ix := kx 1908 for i := 0; i < n; i++ { 1909 jx := kx 1910 atmp := ap[offset-i : offset] 1911 var sum float64 1912 for _, v := range atmp { 1913 sum += v * x[jx] 1914 jx += incX 1915 } 1916 x[ix] -= sum 1917 if nonUnit { 1918 x[ix] /= ap[offset] 1919 } 1920 ix += incX 1921 offset += i + 2 1922 } 1923 return 1924 } 1925 // Cases where ap is transposed. 1926 if ul == blas.Upper { 1927 if incX == 1 { 1928 for i := 0; i < n; i++ { 1929 if nonUnit { 1930 x[i] /= ap[offset] 1931 } 1932 xi := x[i] 1933 atmp := ap[offset+1 : offset+n-i] 1934 xtmp := x[i+1:] 1935 for j, v := range atmp { 1936 xtmp[j] -= v * xi 1937 } 1938 offset += n - i 1939 } 1940 return 1941 } 1942 ix := kx 1943 for i := 0; i < n; i++ { 1944 if nonUnit { 1945 x[ix] /= ap[offset] 1946 } 1947 xix := x[ix] 1948 atmp := ap[offset+1 : offset+n-i] 1949 jx := kx + (i+1)*incX 1950 for _, v := range atmp { 1951 x[jx] -= v * xix 1952 jx += incX 1953 } 1954 ix += incX 1955 offset += n - i 1956 } 1957 return 1958 } 1959 if incX == 1 { 1960 offset = n*(n+1)/2 - 1 1961 for i := n - 1; i >= 0; i-- { 1962 if nonUnit { 1963 x[i] /= ap[offset] 1964 } 1965 xi := x[i] 1966 atmp := ap[offset-i : offset] 1967 for j, v := range atmp { 1968 x[j] -= v * xi 1969 } 1970 offset -= i + 1 1971 } 1972 return 1973 } 1974 ix := kx + (n-1)*incX 1975 offset = n*(n+1)/2 - 1 1976 for i := n - 1; i >= 0; i-- { 1977 if nonUnit { 1978 x[ix] /= ap[offset] 1979 } 1980 xix := x[ix] 1981 atmp := ap[offset-i : offset] 1982 jx := kx 1983 for _, v := range atmp { 1984 x[jx] -= v * xix 1985 jx += incX 1986 } 1987 ix -= incX 1988 offset -= i + 1 1989 } 1990 } 1991 1992 // Dspmv performs the matrix-vector operation 1993 // 1994 // y = alpha * A * x + beta * y 1995 // 1996 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 1997 // and alpha and beta are scalars. 1998 func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) { 1999 if ul != blas.Lower && ul != blas.Upper { 2000 panic(badUplo) 2001 } 2002 if n < 0 { 2003 panic(nLT0) 2004 } 2005 if incX == 0 { 2006 panic(zeroIncX) 2007 } 2008 if incY == 0 { 2009 panic(zeroIncY) 2010 } 2011 2012 // Quick return if possible. 2013 if n == 0 { 2014 return 2015 } 2016 2017 // For zero matrix size the following slice length checks are trivially satisfied. 2018 if len(ap) < n*(n+1)/2 { 2019 panic(shortAP) 2020 } 2021 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2022 panic(shortX) 2023 } 2024 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2025 panic(shortY) 2026 } 2027 2028 // Quick return if possible. 2029 if alpha == 0 && beta == 1 { 2030 return 2031 } 2032 2033 // Set up start points 2034 var kx, ky int 2035 if incX < 0 { 2036 kx = -(n - 1) * incX 2037 } 2038 if incY < 0 { 2039 ky = -(n - 1) * incY 2040 } 2041 2042 // Form y = beta * y. 2043 if beta != 1 { 2044 if incY == 1 { 2045 if beta == 0 { 2046 for i := range y[:n] { 2047 y[i] = 0 2048 } 2049 } else { 2050 f64.ScalUnitary(beta, y[:n]) 2051 } 2052 } else { 2053 iy := ky 2054 if beta == 0 { 2055 for i := 0; i < n; i++ { 2056 y[iy] = 0 2057 iy += incY 2058 } 2059 } else { 2060 if incY > 0 { 2061 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 2062 } else { 2063 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 2064 } 2065 } 2066 } 2067 } 2068 2069 if alpha == 0 { 2070 return 2071 } 2072 2073 if n == 1 { 2074 y[0] += alpha * ap[0] * x[0] 2075 return 2076 } 2077 var offset int // Offset is the index of (i,i). 2078 if ul == blas.Upper { 2079 if incX == 1 { 2080 iy := ky 2081 for i := 0; i < n; i++ { 2082 xv := x[i] * alpha 2083 sum := ap[offset] * x[i] 2084 atmp := ap[offset+1 : offset+n-i] 2085 xtmp := x[i+1:] 2086 jy := ky + (i+1)*incY 2087 for j, v := range atmp { 2088 sum += v * xtmp[j] 2089 y[jy] += v * xv 2090 jy += incY 2091 } 2092 y[iy] += alpha * sum 2093 iy += incY 2094 offset += n - i 2095 } 2096 return 2097 } 2098 ix := kx 2099 iy := ky 2100 for i := 0; i < n; i++ { 2101 xv := x[ix] * alpha 2102 sum := ap[offset] * x[ix] 2103 atmp := ap[offset+1 : offset+n-i] 2104 jx := kx + (i+1)*incX 2105 jy := ky + (i+1)*incY 2106 for _, v := range atmp { 2107 sum += v * x[jx] 2108 y[jy] += v * xv 2109 jx += incX 2110 jy += incY 2111 } 2112 y[iy] += alpha * sum 2113 ix += incX 2114 iy += incY 2115 offset += n - i 2116 } 2117 return 2118 } 2119 if incX == 1 { 2120 iy := ky 2121 for i := 0; i < n; i++ { 2122 xv := x[i] * alpha 2123 atmp := ap[offset-i : offset] 2124 jy := ky 2125 var sum float64 2126 for j, v := range atmp { 2127 sum += v * x[j] 2128 y[jy] += v * xv 2129 jy += incY 2130 } 2131 sum += ap[offset] * x[i] 2132 y[iy] += alpha * sum 2133 iy += incY 2134 offset += i + 2 2135 } 2136 return 2137 } 2138 ix := kx 2139 iy := ky 2140 for i := 0; i < n; i++ { 2141 xv := x[ix] * alpha 2142 atmp := ap[offset-i : offset] 2143 jx := kx 2144 jy := ky 2145 var sum float64 2146 for _, v := range atmp { 2147 sum += v * x[jx] 2148 y[jy] += v * xv 2149 jx += incX 2150 jy += incY 2151 } 2152 2153 sum += ap[offset] * x[ix] 2154 y[iy] += alpha * sum 2155 ix += incX 2156 iy += incY 2157 offset += i + 2 2158 } 2159 } 2160 2161 // Dspr performs the symmetric rank-one operation 2162 // 2163 // A += alpha * x * xᵀ 2164 // 2165 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2166 // alpha is a scalar. 2167 func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { 2168 if ul != blas.Lower && ul != blas.Upper { 2169 panic(badUplo) 2170 } 2171 if n < 0 { 2172 panic(nLT0) 2173 } 2174 if incX == 0 { 2175 panic(zeroIncX) 2176 } 2177 2178 // Quick return if possible. 2179 if n == 0 { 2180 return 2181 } 2182 2183 // For zero matrix size the following slice length checks are trivially satisfied. 2184 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2185 panic(shortX) 2186 } 2187 if len(ap) < n*(n+1)/2 { 2188 panic(shortAP) 2189 } 2190 2191 // Quick return if possible. 2192 if alpha == 0 { 2193 return 2194 } 2195 2196 lenX := n 2197 var kx int 2198 if incX < 0 { 2199 kx = -(lenX - 1) * incX 2200 } 2201 var offset int // Offset is the index of (i,i). 2202 if ul == blas.Upper { 2203 if incX == 1 { 2204 for i := 0; i < n; i++ { 2205 atmp := ap[offset:] 2206 xv := alpha * x[i] 2207 xtmp := x[i:n] 2208 for j, v := range xtmp { 2209 atmp[j] += xv * v 2210 } 2211 offset += n - i 2212 } 2213 return 2214 } 2215 ix := kx 2216 for i := 0; i < n; i++ { 2217 jx := kx + i*incX 2218 atmp := ap[offset:] 2219 xv := alpha * x[ix] 2220 for j := 0; j < n-i; j++ { 2221 atmp[j] += xv * x[jx] 2222 jx += incX 2223 } 2224 ix += incX 2225 offset += n - i 2226 } 2227 return 2228 } 2229 if incX == 1 { 2230 for i := 0; i < n; i++ { 2231 atmp := ap[offset-i:] 2232 xv := alpha * x[i] 2233 xtmp := x[:i+1] 2234 for j, v := range xtmp { 2235 atmp[j] += xv * v 2236 } 2237 offset += i + 2 2238 } 2239 return 2240 } 2241 ix := kx 2242 for i := 0; i < n; i++ { 2243 jx := kx 2244 atmp := ap[offset-i:] 2245 xv := alpha * x[ix] 2246 for j := 0; j <= i; j++ { 2247 atmp[j] += xv * x[jx] 2248 jx += incX 2249 } 2250 ix += incX 2251 offset += i + 2 2252 } 2253 } 2254 2255 // Dspr2 performs the symmetric rank-2 update 2256 // 2257 // A += alpha * x * yᵀ + alpha * y * xᵀ 2258 // 2259 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2260 // and alpha is a scalar. 2261 func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { 2262 if ul != blas.Lower && ul != blas.Upper { 2263 panic(badUplo) 2264 } 2265 if n < 0 { 2266 panic(nLT0) 2267 } 2268 if incX == 0 { 2269 panic(zeroIncX) 2270 } 2271 if incY == 0 { 2272 panic(zeroIncY) 2273 } 2274 2275 // Quick return if possible. 2276 if n == 0 { 2277 return 2278 } 2279 2280 // For zero matrix size the following slice length checks are trivially satisfied. 2281 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2282 panic(shortX) 2283 } 2284 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2285 panic(shortY) 2286 } 2287 if len(ap) < n*(n+1)/2 { 2288 panic(shortAP) 2289 } 2290 2291 // Quick return if possible. 2292 if alpha == 0 { 2293 return 2294 } 2295 2296 var ky, kx int 2297 if incY < 0 { 2298 ky = -(n - 1) * incY 2299 } 2300 if incX < 0 { 2301 kx = -(n - 1) * incX 2302 } 2303 var offset int // Offset is the index of (i,i). 2304 if ul == blas.Upper { 2305 if incX == 1 && incY == 1 { 2306 for i := 0; i < n; i++ { 2307 atmp := ap[offset:] 2308 xi := x[i] 2309 yi := y[i] 2310 xtmp := x[i:n] 2311 ytmp := y[i:n] 2312 for j, v := range xtmp { 2313 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2314 } 2315 offset += n - i 2316 } 2317 return 2318 } 2319 ix := kx 2320 iy := ky 2321 for i := 0; i < n; i++ { 2322 jx := kx + i*incX 2323 jy := ky + i*incY 2324 atmp := ap[offset:] 2325 xi := x[ix] 2326 yi := y[iy] 2327 for j := 0; j < n-i; j++ { 2328 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2329 jx += incX 2330 jy += incY 2331 } 2332 ix += incX 2333 iy += incY 2334 offset += n - i 2335 } 2336 return 2337 } 2338 if incX == 1 && incY == 1 { 2339 for i := 0; i < n; i++ { 2340 atmp := ap[offset-i:] 2341 xi := x[i] 2342 yi := y[i] 2343 xtmp := x[:i+1] 2344 for j, v := range xtmp { 2345 atmp[j] += alpha * (xi*y[j] + v*yi) 2346 } 2347 offset += i + 2 2348 } 2349 return 2350 } 2351 ix := kx 2352 iy := ky 2353 for i := 0; i < n; i++ { 2354 jx := kx 2355 jy := ky 2356 atmp := ap[offset-i:] 2357 for j := 0; j <= i; j++ { 2358 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2359 jx += incX 2360 jy += incY 2361 } 2362 ix += incX 2363 iy += incY 2364 offset += i + 2 2365 } 2366 }