github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level2float64.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/jingcheng-WU/gonum/blas" 9 "github.com/jingcheng-WU/gonum/internal/asm/f64" 10 ) 11 12 var _ blas.Float64Level2 = Implementation{} 13 14 // Dger performs the rank-one operation 15 // A += alpha * x * yᵀ 16 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 17 func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 18 if m < 0 { 19 panic(mLT0) 20 } 21 if n < 0 { 22 panic(nLT0) 23 } 24 if lda < max(1, n) { 25 panic(badLdA) 26 } 27 if incX == 0 { 28 panic(zeroIncX) 29 } 30 if incY == 0 { 31 panic(zeroIncY) 32 } 33 34 // Quick return if possible. 35 if m == 0 || n == 0 { 36 return 37 } 38 39 // For zero matrix size the following slice length checks are trivially satisfied. 40 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 41 panic(shortX) 42 } 43 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 44 panic(shortY) 45 } 46 if len(a) < lda*(m-1)+n { 47 panic(shortA) 48 } 49 50 // Quick return if possible. 51 if alpha == 0 { 52 return 53 } 54 f64.Ger(uintptr(m), uintptr(n), 55 alpha, 56 x, uintptr(incX), 57 y, uintptr(incY), 58 a, uintptr(lda)) 59 } 60 61 // Dgbmv performs one of the matrix-vector operations 62 // y = alpha * A * x + beta * y if tA == blas.NoTrans 63 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 64 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 65 // x and y are vectors, and alpha and beta are scalars. 66 func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 67 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 68 panic(badTranspose) 69 } 70 if m < 0 { 71 panic(mLT0) 72 } 73 if n < 0 { 74 panic(nLT0) 75 } 76 if kL < 0 { 77 panic(kLLT0) 78 } 79 if kU < 0 { 80 panic(kULT0) 81 } 82 if lda < kL+kU+1 { 83 panic(badLdA) 84 } 85 if incX == 0 { 86 panic(zeroIncX) 87 } 88 if incY == 0 { 89 panic(zeroIncY) 90 } 91 92 // Quick return if possible. 93 if m == 0 || n == 0 { 94 return 95 } 96 97 // For zero matrix size the following slice length checks are trivially satisfied. 98 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 99 panic(shortA) 100 } 101 lenX := m 102 lenY := n 103 if tA == blas.NoTrans { 104 lenX = n 105 lenY = m 106 } 107 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 108 panic(shortX) 109 } 110 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 111 panic(shortY) 112 } 113 114 // Quick return if possible. 115 if alpha == 0 && beta == 1 { 116 return 117 } 118 119 var kx, ky int 120 if incX < 0 { 121 kx = -(lenX - 1) * incX 122 } 123 if incY < 0 { 124 ky = -(lenY - 1) * incY 125 } 126 127 // Form y = beta * y. 128 if beta != 1 { 129 if incY == 1 { 130 if beta == 0 { 131 for i := range y[:lenY] { 132 y[i] = 0 133 } 134 } else { 135 f64.ScalUnitary(beta, y[:lenY]) 136 } 137 } else { 138 iy := ky 139 if beta == 0 { 140 for i := 0; i < lenY; i++ { 141 y[iy] = 0 142 iy += incY 143 } 144 } else { 145 if incY > 0 { 146 f64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 147 } else { 148 f64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 149 } 150 } 151 } 152 } 153 154 if alpha == 0 { 155 return 156 } 157 158 // i and j are indices of the compacted banded matrix. 159 // off is the offset into the dense matrix (off + j = densej) 160 nCol := kU + 1 + kL 161 if tA == blas.NoTrans { 162 iy := ky 163 if incX == 1 { 164 for i := 0; i < min(m, n+kL); i++ { 165 l := max(0, kL-i) 166 u := min(nCol, n+kL-i) 167 off := max(0, i-kL) 168 atmp := a[i*lda+l : i*lda+u] 169 xtmp := x[off : off+u-l] 170 var sum float64 171 for j, v := range atmp { 172 sum += xtmp[j] * v 173 } 174 y[iy] += sum * alpha 175 iy += incY 176 } 177 return 178 } 179 for i := 0; i < min(m, n+kL); i++ { 180 l := max(0, kL-i) 181 u := min(nCol, n+kL-i) 182 off := max(0, i-kL) 183 atmp := a[i*lda+l : i*lda+u] 184 jx := kx 185 var sum float64 186 for _, v := range atmp { 187 sum += x[off*incX+jx] * v 188 jx += incX 189 } 190 y[iy] += sum * alpha 191 iy += incY 192 } 193 return 194 } 195 if incX == 1 { 196 for i := 0; i < min(m, n+kL); i++ { 197 l := max(0, kL-i) 198 u := min(nCol, n+kL-i) 199 off := max(0, i-kL) 200 atmp := a[i*lda+l : i*lda+u] 201 tmp := alpha * x[i] 202 jy := ky 203 for _, v := range atmp { 204 y[jy+off*incY] += tmp * v 205 jy += incY 206 } 207 } 208 return 209 } 210 ix := kx 211 for i := 0; i < min(m, n+kL); i++ { 212 l := max(0, kL-i) 213 u := min(nCol, n+kL-i) 214 off := max(0, i-kL) 215 atmp := a[i*lda+l : i*lda+u] 216 tmp := alpha * x[ix] 217 jy := ky 218 for _, v := range atmp { 219 y[jy+off*incY] += tmp * v 220 jy += incY 221 } 222 ix += incX 223 } 224 } 225 226 // Dtrmv performs one of the matrix-vector operations 227 // x = A * x if tA == blas.NoTrans 228 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 229 // where A is an n×n triangular matrix, and x is a vector. 230 func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 231 if ul != blas.Lower && ul != blas.Upper { 232 panic(badUplo) 233 } 234 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 235 panic(badTranspose) 236 } 237 if d != blas.NonUnit && d != blas.Unit { 238 panic(badDiag) 239 } 240 if n < 0 { 241 panic(nLT0) 242 } 243 if lda < max(1, n) { 244 panic(badLdA) 245 } 246 if incX == 0 { 247 panic(zeroIncX) 248 } 249 250 // Quick return if possible. 251 if n == 0 { 252 return 253 } 254 255 // For zero matrix size the following slice length checks are trivially satisfied. 256 if len(a) < lda*(n-1)+n { 257 panic(shortA) 258 } 259 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 260 panic(shortX) 261 } 262 263 nonUnit := d != blas.Unit 264 if n == 1 { 265 if nonUnit { 266 x[0] *= a[0] 267 } 268 return 269 } 270 var kx int 271 if incX <= 0 { 272 kx = -(n - 1) * incX 273 } 274 if tA == blas.NoTrans { 275 if ul == blas.Upper { 276 if incX == 1 { 277 for i := 0; i < n; i++ { 278 ilda := i * lda 279 var tmp float64 280 if nonUnit { 281 tmp = a[ilda+i] * x[i] 282 } else { 283 tmp = x[i] 284 } 285 x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 286 } 287 return 288 } 289 ix := kx 290 for i := 0; i < n; i++ { 291 ilda := i * lda 292 var tmp float64 293 if nonUnit { 294 tmp = a[ilda+i] * x[ix] 295 } else { 296 tmp = x[ix] 297 } 298 x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 299 ix += incX 300 } 301 return 302 } 303 if incX == 1 { 304 for i := n - 1; i >= 0; i-- { 305 ilda := i * lda 306 var tmp float64 307 if nonUnit { 308 tmp += a[ilda+i] * x[i] 309 } else { 310 tmp = x[i] 311 } 312 x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x[:i]) 313 } 314 return 315 } 316 ix := kx + (n-1)*incX 317 for i := n - 1; i >= 0; i-- { 318 ilda := i * lda 319 var tmp float64 320 if nonUnit { 321 tmp = a[ilda+i] * x[ix] 322 } else { 323 tmp = x[ix] 324 } 325 x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 326 ix -= incX 327 } 328 return 329 } 330 // Cases where a is transposed. 331 if ul == blas.Upper { 332 if incX == 1 { 333 for i := n - 1; i >= 0; i-- { 334 ilda := i * lda 335 xi := x[i] 336 f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 337 if nonUnit { 338 x[i] *= a[ilda+i] 339 } 340 } 341 return 342 } 343 ix := kx + (n-1)*incX 344 for i := n - 1; i >= 0; i-- { 345 ilda := i * lda 346 xi := x[ix] 347 f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 348 if nonUnit { 349 x[ix] *= a[ilda+i] 350 } 351 ix -= incX 352 } 353 return 354 } 355 if incX == 1 { 356 for i := 0; i < n; i++ { 357 ilda := i * lda 358 xi := x[i] 359 f64.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 360 if nonUnit { 361 x[i] *= a[i*lda+i] 362 } 363 } 364 return 365 } 366 ix := kx 367 for i := 0; i < n; i++ { 368 ilda := i * lda 369 xi := x[ix] 370 f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 371 if nonUnit { 372 x[ix] *= a[ilda+i] 373 } 374 ix += incX 375 } 376 } 377 378 // Dtrsv solves one of the systems of equations 379 // A * x = b if tA == blas.NoTrans 380 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 381 // where A is an n×n triangular matrix, and x and b are vectors. 382 // 383 // At entry to the function, x contains the values of b, and the result is 384 // stored in-place into x. 385 // 386 // No test for singularity or near-singularity is included in this 387 // routine. Such tests must be performed before calling this routine. 388 func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 389 if ul != blas.Lower && ul != blas.Upper { 390 panic(badUplo) 391 } 392 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 393 panic(badTranspose) 394 } 395 if d != blas.NonUnit && d != blas.Unit { 396 panic(badDiag) 397 } 398 if n < 0 { 399 panic(nLT0) 400 } 401 if lda < max(1, n) { 402 panic(badLdA) 403 } 404 if incX == 0 { 405 panic(zeroIncX) 406 } 407 408 // Quick return if possible. 409 if n == 0 { 410 return 411 } 412 413 // For zero matrix size the following slice length checks are trivially satisfied. 414 if len(a) < lda*(n-1)+n { 415 panic(shortA) 416 } 417 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 418 panic(shortX) 419 } 420 421 if n == 1 { 422 if d == blas.NonUnit { 423 x[0] /= a[0] 424 } 425 return 426 } 427 428 var kx int 429 if incX < 0 { 430 kx = -(n - 1) * incX 431 } 432 nonUnit := d == blas.NonUnit 433 if tA == blas.NoTrans { 434 if ul == blas.Upper { 435 if incX == 1 { 436 for i := n - 1; i >= 0; i-- { 437 var sum float64 438 atmp := a[i*lda+i+1 : i*lda+n] 439 for j, v := range atmp { 440 jv := i + j + 1 441 sum += x[jv] * v 442 } 443 x[i] -= sum 444 if nonUnit { 445 x[i] /= a[i*lda+i] 446 } 447 } 448 return 449 } 450 ix := kx + (n-1)*incX 451 for i := n - 1; i >= 0; i-- { 452 var sum float64 453 jx := ix + incX 454 atmp := a[i*lda+i+1 : i*lda+n] 455 for _, v := range atmp { 456 sum += x[jx] * v 457 jx += incX 458 } 459 x[ix] -= sum 460 if nonUnit { 461 x[ix] /= a[i*lda+i] 462 } 463 ix -= incX 464 } 465 return 466 } 467 if incX == 1 { 468 for i := 0; i < n; i++ { 469 var sum float64 470 atmp := a[i*lda : i*lda+i] 471 for j, v := range atmp { 472 sum += x[j] * v 473 } 474 x[i] -= sum 475 if nonUnit { 476 x[i] /= a[i*lda+i] 477 } 478 } 479 return 480 } 481 ix := kx 482 for i := 0; i < n; i++ { 483 jx := kx 484 var sum float64 485 atmp := a[i*lda : i*lda+i] 486 for _, v := range atmp { 487 sum += x[jx] * v 488 jx += incX 489 } 490 x[ix] -= sum 491 if nonUnit { 492 x[ix] /= a[i*lda+i] 493 } 494 ix += incX 495 } 496 return 497 } 498 // Cases where a is transposed. 499 if ul == blas.Upper { 500 if incX == 1 { 501 for i := 0; i < n; i++ { 502 if nonUnit { 503 x[i] /= a[i*lda+i] 504 } 505 xi := x[i] 506 atmp := a[i*lda+i+1 : i*lda+n] 507 for j, v := range atmp { 508 jv := j + i + 1 509 x[jv] -= v * xi 510 } 511 } 512 return 513 } 514 ix := kx 515 for i := 0; i < n; i++ { 516 if nonUnit { 517 x[ix] /= a[i*lda+i] 518 } 519 xi := x[ix] 520 jx := kx + (i+1)*incX 521 atmp := a[i*lda+i+1 : i*lda+n] 522 for _, v := range atmp { 523 x[jx] -= v * xi 524 jx += incX 525 } 526 ix += incX 527 } 528 return 529 } 530 if incX == 1 { 531 for i := n - 1; i >= 0; i-- { 532 if nonUnit { 533 x[i] /= a[i*lda+i] 534 } 535 xi := x[i] 536 atmp := a[i*lda : i*lda+i] 537 for j, v := range atmp { 538 x[j] -= v * xi 539 } 540 } 541 return 542 } 543 ix := kx + (n-1)*incX 544 for i := n - 1; i >= 0; i-- { 545 if nonUnit { 546 x[ix] /= a[i*lda+i] 547 } 548 xi := x[ix] 549 jx := kx 550 atmp := a[i*lda : i*lda+i] 551 for _, v := range atmp { 552 x[jx] -= v * xi 553 jx += incX 554 } 555 ix -= incX 556 } 557 } 558 559 // Dsymv performs the matrix-vector operation 560 // y = alpha * A * x + beta * y 561 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 562 // beta are scalars. 563 func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 564 if ul != blas.Lower && ul != blas.Upper { 565 panic(badUplo) 566 } 567 if n < 0 { 568 panic(nLT0) 569 } 570 if lda < max(1, n) { 571 panic(badLdA) 572 } 573 if incX == 0 { 574 panic(zeroIncX) 575 } 576 if incY == 0 { 577 panic(zeroIncY) 578 } 579 580 // Quick return if possible. 581 if n == 0 { 582 return 583 } 584 585 // For zero matrix size the following slice length checks are trivially satisfied. 586 if len(a) < lda*(n-1)+n { 587 panic(shortA) 588 } 589 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 590 panic(shortX) 591 } 592 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 593 panic(shortY) 594 } 595 596 // Quick return if possible. 597 if alpha == 0 && beta == 1 { 598 return 599 } 600 601 // Set up start points 602 var kx, ky int 603 if incX < 0 { 604 kx = -(n - 1) * incX 605 } 606 if incY < 0 { 607 ky = -(n - 1) * incY 608 } 609 610 // Form y = beta * y 611 if beta != 1 { 612 if incY == 1 { 613 if beta == 0 { 614 for i := range y[:n] { 615 y[i] = 0 616 } 617 } else { 618 f64.ScalUnitary(beta, y[:n]) 619 } 620 } else { 621 iy := ky 622 if beta == 0 { 623 for i := 0; i < n; i++ { 624 y[iy] = 0 625 iy += incY 626 } 627 } else { 628 if incY > 0 { 629 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 630 } else { 631 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 632 } 633 } 634 } 635 } 636 637 if alpha == 0 { 638 return 639 } 640 641 if n == 1 { 642 y[0] += alpha * a[0] * x[0] 643 return 644 } 645 646 if ul == blas.Upper { 647 if incX == 1 { 648 iy := ky 649 for i := 0; i < n; i++ { 650 xv := x[i] * alpha 651 sum := x[i] * a[i*lda+i] 652 jy := ky + (i+1)*incY 653 atmp := a[i*lda+i+1 : i*lda+n] 654 for j, v := range atmp { 655 jp := j + i + 1 656 sum += x[jp] * v 657 y[jy] += xv * v 658 jy += incY 659 } 660 y[iy] += alpha * sum 661 iy += incY 662 } 663 return 664 } 665 ix := kx 666 iy := ky 667 for i := 0; i < n; i++ { 668 xv := x[ix] * alpha 669 sum := x[ix] * a[i*lda+i] 670 jx := kx + (i+1)*incX 671 jy := ky + (i+1)*incY 672 atmp := a[i*lda+i+1 : i*lda+n] 673 for _, v := range atmp { 674 sum += x[jx] * v 675 y[jy] += xv * v 676 jx += incX 677 jy += incY 678 } 679 y[iy] += alpha * sum 680 ix += incX 681 iy += incY 682 } 683 return 684 } 685 // Cases where a is lower triangular. 686 if incX == 1 { 687 iy := ky 688 for i := 0; i < n; i++ { 689 jy := ky 690 xv := alpha * x[i] 691 atmp := a[i*lda : i*lda+i] 692 var sum float64 693 for j, v := range atmp { 694 sum += x[j] * v 695 y[jy] += xv * v 696 jy += incY 697 } 698 sum += x[i] * a[i*lda+i] 699 sum *= alpha 700 y[iy] += sum 701 iy += incY 702 } 703 return 704 } 705 ix := kx 706 iy := ky 707 for i := 0; i < n; i++ { 708 jx := kx 709 jy := ky 710 xv := alpha * x[ix] 711 atmp := a[i*lda : i*lda+i] 712 var sum float64 713 for _, v := range atmp { 714 sum += x[jx] * v 715 y[jy] += xv * v 716 jx += incX 717 jy += incY 718 } 719 sum += x[ix] * a[i*lda+i] 720 sum *= alpha 721 y[iy] += sum 722 ix += incX 723 iy += incY 724 } 725 } 726 727 // Dtbmv performs one of the matrix-vector operations 728 // x = A * x if tA == blas.NoTrans 729 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 730 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 731 func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 732 if ul != blas.Lower && ul != blas.Upper { 733 panic(badUplo) 734 } 735 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 736 panic(badTranspose) 737 } 738 if d != blas.NonUnit && d != blas.Unit { 739 panic(badDiag) 740 } 741 if n < 0 { 742 panic(nLT0) 743 } 744 if k < 0 { 745 panic(kLT0) 746 } 747 if lda < k+1 { 748 panic(badLdA) 749 } 750 if incX == 0 { 751 panic(zeroIncX) 752 } 753 754 // Quick return if possible. 755 if n == 0 { 756 return 757 } 758 759 // For zero matrix size the following slice length checks are trivially satisfied. 760 if len(a) < lda*(n-1)+k+1 { 761 panic(shortA) 762 } 763 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 764 panic(shortX) 765 } 766 767 var kx int 768 if incX < 0 { 769 kx = -(n - 1) * incX 770 } 771 772 nonunit := d != blas.Unit 773 774 if tA == blas.NoTrans { 775 if ul == blas.Upper { 776 if incX == 1 { 777 for i := 0; i < n; i++ { 778 u := min(1+k, n-i) 779 var sum float64 780 atmp := a[i*lda:] 781 xtmp := x[i:] 782 for j := 1; j < u; j++ { 783 sum += xtmp[j] * atmp[j] 784 } 785 if nonunit { 786 sum += xtmp[0] * atmp[0] 787 } else { 788 sum += xtmp[0] 789 } 790 x[i] = sum 791 } 792 return 793 } 794 ix := kx 795 for i := 0; i < n; i++ { 796 u := min(1+k, n-i) 797 var sum float64 798 atmp := a[i*lda:] 799 jx := incX 800 for j := 1; j < u; j++ { 801 sum += x[ix+jx] * atmp[j] 802 jx += incX 803 } 804 if nonunit { 805 sum += x[ix] * atmp[0] 806 } else { 807 sum += x[ix] 808 } 809 x[ix] = sum 810 ix += incX 811 } 812 return 813 } 814 if incX == 1 { 815 for i := n - 1; i >= 0; i-- { 816 l := max(0, k-i) 817 atmp := a[i*lda:] 818 var sum float64 819 for j := l; j < k; j++ { 820 sum += x[i-k+j] * atmp[j] 821 } 822 if nonunit { 823 sum += x[i] * atmp[k] 824 } else { 825 sum += x[i] 826 } 827 x[i] = sum 828 } 829 return 830 } 831 ix := kx + (n-1)*incX 832 for i := n - 1; i >= 0; i-- { 833 l := max(0, k-i) 834 atmp := a[i*lda:] 835 var sum float64 836 jx := l * incX 837 for j := l; j < k; j++ { 838 sum += x[ix-k*incX+jx] * atmp[j] 839 jx += incX 840 } 841 if nonunit { 842 sum += x[ix] * atmp[k] 843 } else { 844 sum += x[ix] 845 } 846 x[ix] = sum 847 ix -= incX 848 } 849 return 850 } 851 if ul == blas.Upper { 852 if incX == 1 { 853 for i := n - 1; i >= 0; i-- { 854 u := k + 1 855 if i < u { 856 u = i + 1 857 } 858 var sum float64 859 for j := 1; j < u; j++ { 860 sum += x[i-j] * a[(i-j)*lda+j] 861 } 862 if nonunit { 863 sum += x[i] * a[i*lda] 864 } else { 865 sum += x[i] 866 } 867 x[i] = sum 868 } 869 return 870 } 871 ix := kx + (n-1)*incX 872 for i := n - 1; i >= 0; i-- { 873 u := k + 1 874 if i < u { 875 u = i + 1 876 } 877 var sum float64 878 jx := incX 879 for j := 1; j < u; j++ { 880 sum += x[ix-jx] * a[(i-j)*lda+j] 881 jx += incX 882 } 883 if nonunit { 884 sum += x[ix] * a[i*lda] 885 } else { 886 sum += x[ix] 887 } 888 x[ix] = sum 889 ix -= incX 890 } 891 return 892 } 893 if incX == 1 { 894 for i := 0; i < n; i++ { 895 u := k 896 if i+k >= n { 897 u = n - i - 1 898 } 899 var sum float64 900 for j := 0; j < u; j++ { 901 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 902 } 903 if nonunit { 904 sum += x[i] * a[i*lda+k] 905 } else { 906 sum += x[i] 907 } 908 x[i] = sum 909 } 910 return 911 } 912 ix := kx 913 for i := 0; i < n; i++ { 914 u := k 915 if i+k >= n { 916 u = n - i - 1 917 } 918 var ( 919 sum float64 920 jx int 921 ) 922 for j := 0; j < u; j++ { 923 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 924 jx += incX 925 } 926 if nonunit { 927 sum += x[ix] * a[i*lda+k] 928 } else { 929 sum += x[ix] 930 } 931 x[ix] = sum 932 ix += incX 933 } 934 } 935 936 // Dtpmv performs one of the matrix-vector operations 937 // x = A * x if tA == blas.NoTrans 938 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 939 // where A is an n×n triangular matrix in packed format, and x is a vector. 940 func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 941 if ul != blas.Lower && ul != blas.Upper { 942 panic(badUplo) 943 } 944 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 945 panic(badTranspose) 946 } 947 if d != blas.NonUnit && d != blas.Unit { 948 panic(badDiag) 949 } 950 if n < 0 { 951 panic(nLT0) 952 } 953 if incX == 0 { 954 panic(zeroIncX) 955 } 956 957 // Quick return if possible. 958 if n == 0 { 959 return 960 } 961 962 // For zero matrix size the following slice length checks are trivially satisfied. 963 if len(ap) < n*(n+1)/2 { 964 panic(shortAP) 965 } 966 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 967 panic(shortX) 968 } 969 970 var kx int 971 if incX < 0 { 972 kx = -(n - 1) * incX 973 } 974 975 nonUnit := d == blas.NonUnit 976 var offset int // Offset is the index of (i,i) 977 if tA == blas.NoTrans { 978 if ul == blas.Upper { 979 if incX == 1 { 980 for i := 0; i < n; i++ { 981 xi := x[i] 982 if nonUnit { 983 xi *= ap[offset] 984 } 985 atmp := ap[offset+1 : offset+n-i] 986 xtmp := x[i+1:] 987 for j, v := range atmp { 988 xi += v * xtmp[j] 989 } 990 x[i] = xi 991 offset += n - i 992 } 993 return 994 } 995 ix := kx 996 for i := 0; i < n; i++ { 997 xix := x[ix] 998 if nonUnit { 999 xix *= ap[offset] 1000 } 1001 atmp := ap[offset+1 : offset+n-i] 1002 jx := kx + (i+1)*incX 1003 for _, v := range atmp { 1004 xix += v * x[jx] 1005 jx += incX 1006 } 1007 x[ix] = xix 1008 offset += n - i 1009 ix += incX 1010 } 1011 return 1012 } 1013 if incX == 1 { 1014 offset = n*(n+1)/2 - 1 1015 for i := n - 1; i >= 0; i-- { 1016 xi := x[i] 1017 if nonUnit { 1018 xi *= ap[offset] 1019 } 1020 atmp := ap[offset-i : offset] 1021 for j, v := range atmp { 1022 xi += v * x[j] 1023 } 1024 x[i] = xi 1025 offset -= i + 1 1026 } 1027 return 1028 } 1029 ix := kx + (n-1)*incX 1030 offset = n*(n+1)/2 - 1 1031 for i := n - 1; i >= 0; i-- { 1032 xix := x[ix] 1033 if nonUnit { 1034 xix *= ap[offset] 1035 } 1036 atmp := ap[offset-i : offset] 1037 jx := kx 1038 for _, v := range atmp { 1039 xix += v * x[jx] 1040 jx += incX 1041 } 1042 x[ix] = xix 1043 offset -= i + 1 1044 ix -= incX 1045 } 1046 return 1047 } 1048 // Cases where ap is transposed. 1049 if ul == blas.Upper { 1050 if incX == 1 { 1051 offset = n*(n+1)/2 - 1 1052 for i := n - 1; i >= 0; i-- { 1053 xi := x[i] 1054 atmp := ap[offset+1 : offset+n-i] 1055 xtmp := x[i+1:] 1056 for j, v := range atmp { 1057 xtmp[j] += v * xi 1058 } 1059 if nonUnit { 1060 x[i] *= ap[offset] 1061 } 1062 offset -= n - i + 1 1063 } 1064 return 1065 } 1066 ix := kx + (n-1)*incX 1067 offset = n*(n+1)/2 - 1 1068 for i := n - 1; i >= 0; i-- { 1069 xix := x[ix] 1070 jx := kx + (i+1)*incX 1071 atmp := ap[offset+1 : offset+n-i] 1072 for _, v := range atmp { 1073 x[jx] += v * xix 1074 jx += incX 1075 } 1076 if nonUnit { 1077 x[ix] *= ap[offset] 1078 } 1079 offset -= n - i + 1 1080 ix -= incX 1081 } 1082 return 1083 } 1084 if incX == 1 { 1085 for i := 0; i < n; i++ { 1086 xi := x[i] 1087 atmp := ap[offset-i : offset] 1088 for j, v := range atmp { 1089 x[j] += v * xi 1090 } 1091 if nonUnit { 1092 x[i] *= ap[offset] 1093 } 1094 offset += i + 2 1095 } 1096 return 1097 } 1098 ix := kx 1099 for i := 0; i < n; i++ { 1100 xix := x[ix] 1101 jx := kx 1102 atmp := ap[offset-i : offset] 1103 for _, v := range atmp { 1104 x[jx] += v * xix 1105 jx += incX 1106 } 1107 if nonUnit { 1108 x[ix] *= ap[offset] 1109 } 1110 ix += incX 1111 offset += i + 2 1112 } 1113 } 1114 1115 // Dtbsv solves one of the systems of equations 1116 // A * x = b if tA == blas.NoTrans 1117 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1118 // where A is an n×n triangular band matrix with k+1 diagonals, 1119 // and x and b are vectors. 1120 // 1121 // At entry to the function, x contains the values of b, and the result is 1122 // stored in-place into x. 1123 // 1124 // No test for singularity or near-singularity is included in this 1125 // routine. Such tests must be performed before calling this routine. 1126 func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 1127 if ul != blas.Lower && ul != blas.Upper { 1128 panic(badUplo) 1129 } 1130 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1131 panic(badTranspose) 1132 } 1133 if d != blas.NonUnit && d != blas.Unit { 1134 panic(badDiag) 1135 } 1136 if n < 0 { 1137 panic(nLT0) 1138 } 1139 if k < 0 { 1140 panic(kLT0) 1141 } 1142 if lda < k+1 { 1143 panic(badLdA) 1144 } 1145 if incX == 0 { 1146 panic(zeroIncX) 1147 } 1148 1149 // Quick return if possible. 1150 if n == 0 { 1151 return 1152 } 1153 1154 // For zero matrix size the following slice length checks are trivially satisfied. 1155 if len(a) < lda*(n-1)+k+1 { 1156 panic(shortA) 1157 } 1158 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1159 panic(shortX) 1160 } 1161 1162 var kx int 1163 if incX < 0 { 1164 kx = -(n - 1) * incX 1165 } 1166 nonUnit := d == blas.NonUnit 1167 // Form x = A^-1 x. 1168 // Several cases below use subslices for speed improvement. 1169 // The incX != 1 cases usually do not because incX may be negative. 1170 if tA == blas.NoTrans { 1171 if ul == blas.Upper { 1172 if incX == 1 { 1173 for i := n - 1; i >= 0; i-- { 1174 bands := k 1175 if i+bands >= n { 1176 bands = n - i - 1 1177 } 1178 atmp := a[i*lda+1:] 1179 xtmp := x[i+1 : i+bands+1] 1180 var sum float64 1181 for j, v := range xtmp { 1182 sum += v * atmp[j] 1183 } 1184 x[i] -= sum 1185 if nonUnit { 1186 x[i] /= a[i*lda] 1187 } 1188 } 1189 return 1190 } 1191 ix := kx + (n-1)*incX 1192 for i := n - 1; i >= 0; i-- { 1193 max := k + 1 1194 if i+max > n { 1195 max = n - i 1196 } 1197 atmp := a[i*lda:] 1198 var ( 1199 jx int 1200 sum float64 1201 ) 1202 for j := 1; j < max; j++ { 1203 jx += incX 1204 sum += x[ix+jx] * atmp[j] 1205 } 1206 x[ix] -= sum 1207 if nonUnit { 1208 x[ix] /= atmp[0] 1209 } 1210 ix -= incX 1211 } 1212 return 1213 } 1214 if incX == 1 { 1215 for i := 0; i < n; i++ { 1216 bands := k 1217 if i-k < 0 { 1218 bands = i 1219 } 1220 atmp := a[i*lda+k-bands:] 1221 xtmp := x[i-bands : i] 1222 var sum float64 1223 for j, v := range xtmp { 1224 sum += v * atmp[j] 1225 } 1226 x[i] -= sum 1227 if nonUnit { 1228 x[i] /= atmp[bands] 1229 } 1230 } 1231 return 1232 } 1233 ix := kx 1234 for i := 0; i < n; i++ { 1235 bands := k 1236 if i-k < 0 { 1237 bands = i 1238 } 1239 atmp := a[i*lda+k-bands:] 1240 var ( 1241 sum float64 1242 jx int 1243 ) 1244 for j := 0; j < bands; j++ { 1245 sum += x[ix-bands*incX+jx] * atmp[j] 1246 jx += incX 1247 } 1248 x[ix] -= sum 1249 if nonUnit { 1250 x[ix] /= atmp[bands] 1251 } 1252 ix += incX 1253 } 1254 return 1255 } 1256 // Cases where a is transposed. 1257 if ul == blas.Upper { 1258 if incX == 1 { 1259 for i := 0; i < n; i++ { 1260 bands := k 1261 if i-k < 0 { 1262 bands = i 1263 } 1264 var sum float64 1265 for j := 0; j < bands; j++ { 1266 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1267 } 1268 x[i] -= sum 1269 if nonUnit { 1270 x[i] /= a[i*lda] 1271 } 1272 } 1273 return 1274 } 1275 ix := kx 1276 for i := 0; i < n; i++ { 1277 bands := k 1278 if i-k < 0 { 1279 bands = i 1280 } 1281 var ( 1282 sum float64 1283 jx int 1284 ) 1285 for j := 0; j < bands; j++ { 1286 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1287 jx += incX 1288 } 1289 x[ix] -= sum 1290 if nonUnit { 1291 x[ix] /= a[i*lda] 1292 } 1293 ix += incX 1294 } 1295 return 1296 } 1297 if incX == 1 { 1298 for i := n - 1; i >= 0; i-- { 1299 bands := k 1300 if i+bands >= n { 1301 bands = n - i - 1 1302 } 1303 var sum float64 1304 xtmp := x[i+1 : i+1+bands] 1305 for j, v := range xtmp { 1306 sum += v * a[(i+j+1)*lda+k-j-1] 1307 } 1308 x[i] -= sum 1309 if nonUnit { 1310 x[i] /= a[i*lda+k] 1311 } 1312 } 1313 return 1314 } 1315 ix := kx + (n-1)*incX 1316 for i := n - 1; i >= 0; i-- { 1317 bands := k 1318 if i+bands >= n { 1319 bands = n - i - 1 1320 } 1321 var ( 1322 sum float64 1323 jx int 1324 ) 1325 for j := 0; j < bands; j++ { 1326 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1327 jx += incX 1328 } 1329 x[ix] -= sum 1330 if nonUnit { 1331 x[ix] /= a[i*lda+k] 1332 } 1333 ix -= incX 1334 } 1335 } 1336 1337 // Dsbmv performs the matrix-vector operation 1338 // y = alpha * A * x + beta * y 1339 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1340 // vectors, and alpha and beta are scalars. 1341 func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 1342 if ul != blas.Lower && ul != blas.Upper { 1343 panic(badUplo) 1344 } 1345 if n < 0 { 1346 panic(nLT0) 1347 } 1348 if k < 0 { 1349 panic(kLT0) 1350 } 1351 if lda < k+1 { 1352 panic(badLdA) 1353 } 1354 if incX == 0 { 1355 panic(zeroIncX) 1356 } 1357 if incY == 0 { 1358 panic(zeroIncY) 1359 } 1360 1361 // Quick return if possible. 1362 if n == 0 { 1363 return 1364 } 1365 1366 // For zero matrix size the following slice length checks are trivially satisfied. 1367 if len(a) < lda*(n-1)+k+1 { 1368 panic(shortA) 1369 } 1370 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1371 panic(shortX) 1372 } 1373 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1374 panic(shortY) 1375 } 1376 1377 // Quick return if possible. 1378 if alpha == 0 && beta == 1 { 1379 return 1380 } 1381 1382 // Set up indexes 1383 lenX := n 1384 lenY := n 1385 var kx, ky int 1386 if incX < 0 { 1387 kx = -(lenX - 1) * incX 1388 } 1389 if incY < 0 { 1390 ky = -(lenY - 1) * incY 1391 } 1392 1393 // Form y = beta * y. 1394 if beta != 1 { 1395 if incY == 1 { 1396 if beta == 0 { 1397 for i := range y[:n] { 1398 y[i] = 0 1399 } 1400 } else { 1401 f64.ScalUnitary(beta, y[:n]) 1402 } 1403 } else { 1404 iy := ky 1405 if beta == 0 { 1406 for i := 0; i < n; i++ { 1407 y[iy] = 0 1408 iy += incY 1409 } 1410 } else { 1411 if incY > 0 { 1412 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1413 } else { 1414 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1415 } 1416 } 1417 } 1418 } 1419 1420 if alpha == 0 { 1421 return 1422 } 1423 1424 if ul == blas.Upper { 1425 if incX == 1 { 1426 iy := ky 1427 for i := 0; i < n; i++ { 1428 atmp := a[i*lda:] 1429 tmp := alpha * x[i] 1430 sum := tmp * atmp[0] 1431 u := min(k, n-i-1) 1432 jy := incY 1433 for j := 1; j <= u; j++ { 1434 v := atmp[j] 1435 sum += alpha * x[i+j] * v 1436 y[iy+jy] += tmp * v 1437 jy += incY 1438 } 1439 y[iy] += sum 1440 iy += incY 1441 } 1442 return 1443 } 1444 ix := kx 1445 iy := ky 1446 for i := 0; i < n; i++ { 1447 atmp := a[i*lda:] 1448 tmp := alpha * x[ix] 1449 sum := tmp * atmp[0] 1450 u := min(k, n-i-1) 1451 jx := incX 1452 jy := incY 1453 for j := 1; j <= u; j++ { 1454 v := atmp[j] 1455 sum += alpha * x[ix+jx] * v 1456 y[iy+jy] += tmp * v 1457 jx += incX 1458 jy += incY 1459 } 1460 y[iy] += sum 1461 ix += incX 1462 iy += incY 1463 } 1464 return 1465 } 1466 1467 // Casses where a has bands below the diagonal. 1468 if incX == 1 { 1469 iy := ky 1470 for i := 0; i < n; i++ { 1471 l := max(0, k-i) 1472 tmp := alpha * x[i] 1473 jy := l * incY 1474 atmp := a[i*lda:] 1475 for j := l; j < k; j++ { 1476 v := atmp[j] 1477 y[iy] += alpha * v * x[i-k+j] 1478 y[iy-k*incY+jy] += tmp * v 1479 jy += incY 1480 } 1481 y[iy] += tmp * atmp[k] 1482 iy += incY 1483 } 1484 return 1485 } 1486 ix := kx 1487 iy := ky 1488 for i := 0; i < n; i++ { 1489 l := max(0, k-i) 1490 tmp := alpha * x[ix] 1491 jx := l * incX 1492 jy := l * incY 1493 atmp := a[i*lda:] 1494 for j := l; j < k; j++ { 1495 v := atmp[j] 1496 y[iy] += alpha * v * x[ix-k*incX+jx] 1497 y[iy-k*incY+jy] += tmp * v 1498 jx += incX 1499 jy += incY 1500 } 1501 y[iy] += tmp * atmp[k] 1502 ix += incX 1503 iy += incY 1504 } 1505 } 1506 1507 // Dsyr performs the symmetric rank-one update 1508 // A += alpha * x * xᵀ 1509 // where A is an n×n symmetric matrix, and x is a vector. 1510 func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { 1511 if ul != blas.Lower && ul != blas.Upper { 1512 panic(badUplo) 1513 } 1514 if n < 0 { 1515 panic(nLT0) 1516 } 1517 if lda < max(1, n) { 1518 panic(badLdA) 1519 } 1520 if incX == 0 { 1521 panic(zeroIncX) 1522 } 1523 1524 // Quick return if possible. 1525 if n == 0 { 1526 return 1527 } 1528 1529 // For zero matrix size the following slice length checks are trivially satisfied. 1530 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1531 panic(shortX) 1532 } 1533 if len(a) < lda*(n-1)+n { 1534 panic(shortA) 1535 } 1536 1537 // Quick return if possible. 1538 if alpha == 0 { 1539 return 1540 } 1541 1542 lenX := n 1543 var kx int 1544 if incX < 0 { 1545 kx = -(lenX - 1) * incX 1546 } 1547 if ul == blas.Upper { 1548 if incX == 1 { 1549 for i := 0; i < n; i++ { 1550 tmp := x[i] * alpha 1551 if tmp != 0 { 1552 atmp := a[i*lda+i : i*lda+n] 1553 xtmp := x[i:n] 1554 for j, v := range xtmp { 1555 atmp[j] += v * tmp 1556 } 1557 } 1558 } 1559 return 1560 } 1561 ix := kx 1562 for i := 0; i < n; i++ { 1563 tmp := x[ix] * alpha 1564 if tmp != 0 { 1565 jx := ix 1566 atmp := a[i*lda:] 1567 for j := i; j < n; j++ { 1568 atmp[j] += x[jx] * tmp 1569 jx += incX 1570 } 1571 } 1572 ix += incX 1573 } 1574 return 1575 } 1576 // Cases where a is lower triangular. 1577 if incX == 1 { 1578 for i := 0; i < n; i++ { 1579 tmp := x[i] * alpha 1580 if tmp != 0 { 1581 atmp := a[i*lda:] 1582 xtmp := x[:i+1] 1583 for j, v := range xtmp { 1584 atmp[j] += tmp * v 1585 } 1586 } 1587 } 1588 return 1589 } 1590 ix := kx 1591 for i := 0; i < n; i++ { 1592 tmp := x[ix] * alpha 1593 if tmp != 0 { 1594 atmp := a[i*lda:] 1595 jx := kx 1596 for j := 0; j < i+1; j++ { 1597 atmp[j] += tmp * x[jx] 1598 jx += incX 1599 } 1600 } 1601 ix += incX 1602 } 1603 } 1604 1605 // Dsyr2 performs the symmetric rank-two update 1606 // A += alpha * x * yᵀ + alpha * y * xᵀ 1607 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1608 func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 1609 if ul != blas.Lower && ul != blas.Upper { 1610 panic(badUplo) 1611 } 1612 if n < 0 { 1613 panic(nLT0) 1614 } 1615 if lda < max(1, n) { 1616 panic(badLdA) 1617 } 1618 if incX == 0 { 1619 panic(zeroIncX) 1620 } 1621 if incY == 0 { 1622 panic(zeroIncY) 1623 } 1624 1625 // Quick return if possible. 1626 if n == 0 { 1627 return 1628 } 1629 1630 // For zero matrix size the following slice length checks are trivially satisfied. 1631 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1632 panic(shortX) 1633 } 1634 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1635 panic(shortY) 1636 } 1637 if len(a) < lda*(n-1)+n { 1638 panic(shortA) 1639 } 1640 1641 // Quick return if possible. 1642 if alpha == 0 { 1643 return 1644 } 1645 1646 var ky, kx int 1647 if incY < 0 { 1648 ky = -(n - 1) * incY 1649 } 1650 if incX < 0 { 1651 kx = -(n - 1) * incX 1652 } 1653 if ul == blas.Upper { 1654 if incX == 1 && incY == 1 { 1655 for i := 0; i < n; i++ { 1656 xi := x[i] 1657 yi := y[i] 1658 atmp := a[i*lda:] 1659 for j := i; j < n; j++ { 1660 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1661 } 1662 } 1663 return 1664 } 1665 ix := kx 1666 iy := ky 1667 for i := 0; i < n; i++ { 1668 jx := kx + i*incX 1669 jy := ky + i*incY 1670 xi := x[ix] 1671 yi := y[iy] 1672 atmp := a[i*lda:] 1673 for j := i; j < n; j++ { 1674 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1675 jx += incX 1676 jy += incY 1677 } 1678 ix += incX 1679 iy += incY 1680 } 1681 return 1682 } 1683 if incX == 1 && incY == 1 { 1684 for i := 0; i < n; i++ { 1685 xi := x[i] 1686 yi := y[i] 1687 atmp := a[i*lda:] 1688 for j := 0; j <= i; j++ { 1689 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1690 } 1691 } 1692 return 1693 } 1694 ix := kx 1695 iy := ky 1696 for i := 0; i < n; i++ { 1697 jx := kx 1698 jy := ky 1699 xi := x[ix] 1700 yi := y[iy] 1701 atmp := a[i*lda:] 1702 for j := 0; j <= i; j++ { 1703 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1704 jx += incX 1705 jy += incY 1706 } 1707 ix += incX 1708 iy += incY 1709 } 1710 } 1711 1712 // Dtpsv solves one of the systems of equations 1713 // A * x = b if tA == blas.NoTrans 1714 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1715 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1716 // 1717 // At entry to the function, x contains the values of b, and the result is 1718 // stored in-place into x. 1719 // 1720 // No test for singularity or near-singularity is included in this 1721 // routine. Such tests must be performed before calling this routine. 1722 func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 1723 if ul != blas.Lower && ul != blas.Upper { 1724 panic(badUplo) 1725 } 1726 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1727 panic(badTranspose) 1728 } 1729 if d != blas.NonUnit && d != blas.Unit { 1730 panic(badDiag) 1731 } 1732 if n < 0 { 1733 panic(nLT0) 1734 } 1735 if incX == 0 { 1736 panic(zeroIncX) 1737 } 1738 1739 // Quick return if possible. 1740 if n == 0 { 1741 return 1742 } 1743 1744 // For zero matrix size the following slice length checks are trivially satisfied. 1745 if len(ap) < n*(n+1)/2 { 1746 panic(shortAP) 1747 } 1748 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1749 panic(shortX) 1750 } 1751 1752 var kx int 1753 if incX < 0 { 1754 kx = -(n - 1) * incX 1755 } 1756 1757 nonUnit := d == blas.NonUnit 1758 var offset int // Offset is the index of (i,i) 1759 if tA == blas.NoTrans { 1760 if ul == blas.Upper { 1761 offset = n*(n+1)/2 - 1 1762 if incX == 1 { 1763 for i := n - 1; i >= 0; i-- { 1764 atmp := ap[offset+1 : offset+n-i] 1765 xtmp := x[i+1:] 1766 var sum float64 1767 for j, v := range atmp { 1768 sum += v * xtmp[j] 1769 } 1770 x[i] -= sum 1771 if nonUnit { 1772 x[i] /= ap[offset] 1773 } 1774 offset -= n - i + 1 1775 } 1776 return 1777 } 1778 ix := kx + (n-1)*incX 1779 for i := n - 1; i >= 0; i-- { 1780 atmp := ap[offset+1 : offset+n-i] 1781 jx := kx + (i+1)*incX 1782 var sum float64 1783 for _, v := range atmp { 1784 sum += v * x[jx] 1785 jx += incX 1786 } 1787 x[ix] -= sum 1788 if nonUnit { 1789 x[ix] /= ap[offset] 1790 } 1791 ix -= incX 1792 offset -= n - i + 1 1793 } 1794 return 1795 } 1796 if incX == 1 { 1797 for i := 0; i < n; i++ { 1798 atmp := ap[offset-i : offset] 1799 var sum float64 1800 for j, v := range atmp { 1801 sum += v * x[j] 1802 } 1803 x[i] -= sum 1804 if nonUnit { 1805 x[i] /= ap[offset] 1806 } 1807 offset += i + 2 1808 } 1809 return 1810 } 1811 ix := kx 1812 for i := 0; i < n; i++ { 1813 jx := kx 1814 atmp := ap[offset-i : offset] 1815 var sum float64 1816 for _, v := range atmp { 1817 sum += v * x[jx] 1818 jx += incX 1819 } 1820 x[ix] -= sum 1821 if nonUnit { 1822 x[ix] /= ap[offset] 1823 } 1824 ix += incX 1825 offset += i + 2 1826 } 1827 return 1828 } 1829 // Cases where ap is transposed. 1830 if ul == blas.Upper { 1831 if incX == 1 { 1832 for i := 0; i < n; i++ { 1833 if nonUnit { 1834 x[i] /= ap[offset] 1835 } 1836 xi := x[i] 1837 atmp := ap[offset+1 : offset+n-i] 1838 xtmp := x[i+1:] 1839 for j, v := range atmp { 1840 xtmp[j] -= v * xi 1841 } 1842 offset += n - i 1843 } 1844 return 1845 } 1846 ix := kx 1847 for i := 0; i < n; i++ { 1848 if nonUnit { 1849 x[ix] /= ap[offset] 1850 } 1851 xix := x[ix] 1852 atmp := ap[offset+1 : offset+n-i] 1853 jx := kx + (i+1)*incX 1854 for _, v := range atmp { 1855 x[jx] -= v * xix 1856 jx += incX 1857 } 1858 ix += incX 1859 offset += n - i 1860 } 1861 return 1862 } 1863 if incX == 1 { 1864 offset = n*(n+1)/2 - 1 1865 for i := n - 1; i >= 0; i-- { 1866 if nonUnit { 1867 x[i] /= ap[offset] 1868 } 1869 xi := x[i] 1870 atmp := ap[offset-i : offset] 1871 for j, v := range atmp { 1872 x[j] -= v * xi 1873 } 1874 offset -= i + 1 1875 } 1876 return 1877 } 1878 ix := kx + (n-1)*incX 1879 offset = n*(n+1)/2 - 1 1880 for i := n - 1; i >= 0; i-- { 1881 if nonUnit { 1882 x[ix] /= ap[offset] 1883 } 1884 xix := x[ix] 1885 atmp := ap[offset-i : offset] 1886 jx := kx 1887 for _, v := range atmp { 1888 x[jx] -= v * xix 1889 jx += incX 1890 } 1891 ix -= incX 1892 offset -= i + 1 1893 } 1894 } 1895 1896 // Dspmv performs the matrix-vector operation 1897 // y = alpha * A * x + beta * y 1898 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 1899 // and alpha and beta are scalars. 1900 func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) { 1901 if ul != blas.Lower && ul != blas.Upper { 1902 panic(badUplo) 1903 } 1904 if n < 0 { 1905 panic(nLT0) 1906 } 1907 if incX == 0 { 1908 panic(zeroIncX) 1909 } 1910 if incY == 0 { 1911 panic(zeroIncY) 1912 } 1913 1914 // Quick return if possible. 1915 if n == 0 { 1916 return 1917 } 1918 1919 // For zero matrix size the following slice length checks are trivially satisfied. 1920 if len(ap) < n*(n+1)/2 { 1921 panic(shortAP) 1922 } 1923 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1924 panic(shortX) 1925 } 1926 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1927 panic(shortY) 1928 } 1929 1930 // Quick return if possible. 1931 if alpha == 0 && beta == 1 { 1932 return 1933 } 1934 1935 // Set up start points 1936 var kx, ky int 1937 if incX < 0 { 1938 kx = -(n - 1) * incX 1939 } 1940 if incY < 0 { 1941 ky = -(n - 1) * incY 1942 } 1943 1944 // Form y = beta * y. 1945 if beta != 1 { 1946 if incY == 1 { 1947 if beta == 0 { 1948 for i := range y[:n] { 1949 y[i] = 0 1950 } 1951 } else { 1952 f64.ScalUnitary(beta, y[:n]) 1953 } 1954 } else { 1955 iy := ky 1956 if beta == 0 { 1957 for i := 0; i < n; i++ { 1958 y[iy] = 0 1959 iy += incY 1960 } 1961 } else { 1962 if incY > 0 { 1963 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1964 } else { 1965 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1966 } 1967 } 1968 } 1969 } 1970 1971 if alpha == 0 { 1972 return 1973 } 1974 1975 if n == 1 { 1976 y[0] += alpha * ap[0] * x[0] 1977 return 1978 } 1979 var offset int // Offset is the index of (i,i). 1980 if ul == blas.Upper { 1981 if incX == 1 { 1982 iy := ky 1983 for i := 0; i < n; i++ { 1984 xv := x[i] * alpha 1985 sum := ap[offset] * x[i] 1986 atmp := ap[offset+1 : offset+n-i] 1987 xtmp := x[i+1:] 1988 jy := ky + (i+1)*incY 1989 for j, v := range atmp { 1990 sum += v * xtmp[j] 1991 y[jy] += v * xv 1992 jy += incY 1993 } 1994 y[iy] += alpha * sum 1995 iy += incY 1996 offset += n - i 1997 } 1998 return 1999 } 2000 ix := kx 2001 iy := ky 2002 for i := 0; i < n; i++ { 2003 xv := x[ix] * alpha 2004 sum := ap[offset] * x[ix] 2005 atmp := ap[offset+1 : offset+n-i] 2006 jx := kx + (i+1)*incX 2007 jy := ky + (i+1)*incY 2008 for _, v := range atmp { 2009 sum += v * x[jx] 2010 y[jy] += v * xv 2011 jx += incX 2012 jy += incY 2013 } 2014 y[iy] += alpha * sum 2015 ix += incX 2016 iy += incY 2017 offset += n - i 2018 } 2019 return 2020 } 2021 if incX == 1 { 2022 iy := ky 2023 for i := 0; i < n; i++ { 2024 xv := x[i] * alpha 2025 atmp := ap[offset-i : offset] 2026 jy := ky 2027 var sum float64 2028 for j, v := range atmp { 2029 sum += v * x[j] 2030 y[jy] += v * xv 2031 jy += incY 2032 } 2033 sum += ap[offset] * x[i] 2034 y[iy] += alpha * sum 2035 iy += incY 2036 offset += i + 2 2037 } 2038 return 2039 } 2040 ix := kx 2041 iy := ky 2042 for i := 0; i < n; i++ { 2043 xv := x[ix] * alpha 2044 atmp := ap[offset-i : offset] 2045 jx := kx 2046 jy := ky 2047 var sum float64 2048 for _, v := range atmp { 2049 sum += v * x[jx] 2050 y[jy] += v * xv 2051 jx += incX 2052 jy += incY 2053 } 2054 2055 sum += ap[offset] * x[ix] 2056 y[iy] += alpha * sum 2057 ix += incX 2058 iy += incY 2059 offset += i + 2 2060 } 2061 } 2062 2063 // Dspr performs the symmetric rank-one operation 2064 // A += alpha * x * xᵀ 2065 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2066 // alpha is a scalar. 2067 func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { 2068 if ul != blas.Lower && ul != blas.Upper { 2069 panic(badUplo) 2070 } 2071 if n < 0 { 2072 panic(nLT0) 2073 } 2074 if incX == 0 { 2075 panic(zeroIncX) 2076 } 2077 2078 // Quick return if possible. 2079 if n == 0 { 2080 return 2081 } 2082 2083 // For zero matrix size the following slice length checks are trivially satisfied. 2084 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2085 panic(shortX) 2086 } 2087 if len(ap) < n*(n+1)/2 { 2088 panic(shortAP) 2089 } 2090 2091 // Quick return if possible. 2092 if alpha == 0 { 2093 return 2094 } 2095 2096 lenX := n 2097 var kx int 2098 if incX < 0 { 2099 kx = -(lenX - 1) * incX 2100 } 2101 var offset int // Offset is the index of (i,i). 2102 if ul == blas.Upper { 2103 if incX == 1 { 2104 for i := 0; i < n; i++ { 2105 atmp := ap[offset:] 2106 xv := alpha * x[i] 2107 xtmp := x[i:n] 2108 for j, v := range xtmp { 2109 atmp[j] += xv * v 2110 } 2111 offset += n - i 2112 } 2113 return 2114 } 2115 ix := kx 2116 for i := 0; i < n; i++ { 2117 jx := kx + i*incX 2118 atmp := ap[offset:] 2119 xv := alpha * x[ix] 2120 for j := 0; j < n-i; j++ { 2121 atmp[j] += xv * x[jx] 2122 jx += incX 2123 } 2124 ix += incX 2125 offset += n - i 2126 } 2127 return 2128 } 2129 if incX == 1 { 2130 for i := 0; i < n; i++ { 2131 atmp := ap[offset-i:] 2132 xv := alpha * x[i] 2133 xtmp := x[:i+1] 2134 for j, v := range xtmp { 2135 atmp[j] += xv * v 2136 } 2137 offset += i + 2 2138 } 2139 return 2140 } 2141 ix := kx 2142 for i := 0; i < n; i++ { 2143 jx := kx 2144 atmp := ap[offset-i:] 2145 xv := alpha * x[ix] 2146 for j := 0; j <= i; j++ { 2147 atmp[j] += xv * x[jx] 2148 jx += incX 2149 } 2150 ix += incX 2151 offset += i + 2 2152 } 2153 } 2154 2155 // Dspr2 performs the symmetric rank-2 update 2156 // A += alpha * x * yᵀ + alpha * y * xᵀ 2157 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2158 // and alpha is a scalar. 2159 func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { 2160 if ul != blas.Lower && ul != blas.Upper { 2161 panic(badUplo) 2162 } 2163 if n < 0 { 2164 panic(nLT0) 2165 } 2166 if incX == 0 { 2167 panic(zeroIncX) 2168 } 2169 if incY == 0 { 2170 panic(zeroIncY) 2171 } 2172 2173 // Quick return if possible. 2174 if n == 0 { 2175 return 2176 } 2177 2178 // For zero matrix size the following slice length checks are trivially satisfied. 2179 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2180 panic(shortX) 2181 } 2182 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2183 panic(shortY) 2184 } 2185 if len(ap) < n*(n+1)/2 { 2186 panic(shortAP) 2187 } 2188 2189 // Quick return if possible. 2190 if alpha == 0 { 2191 return 2192 } 2193 2194 var ky, kx int 2195 if incY < 0 { 2196 ky = -(n - 1) * incY 2197 } 2198 if incX < 0 { 2199 kx = -(n - 1) * incX 2200 } 2201 var offset int // Offset is the index of (i,i). 2202 if ul == blas.Upper { 2203 if incX == 1 && incY == 1 { 2204 for i := 0; i < n; i++ { 2205 atmp := ap[offset:] 2206 xi := x[i] 2207 yi := y[i] 2208 xtmp := x[i:n] 2209 ytmp := y[i:n] 2210 for j, v := range xtmp { 2211 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2212 } 2213 offset += n - i 2214 } 2215 return 2216 } 2217 ix := kx 2218 iy := ky 2219 for i := 0; i < n; i++ { 2220 jx := kx + i*incX 2221 jy := ky + i*incY 2222 atmp := ap[offset:] 2223 xi := x[ix] 2224 yi := y[iy] 2225 for j := 0; j < n-i; j++ { 2226 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2227 jx += incX 2228 jy += incY 2229 } 2230 ix += incX 2231 iy += incY 2232 offset += n - i 2233 } 2234 return 2235 } 2236 if incX == 1 && incY == 1 { 2237 for i := 0; i < n; i++ { 2238 atmp := ap[offset-i:] 2239 xi := x[i] 2240 yi := y[i] 2241 xtmp := x[:i+1] 2242 for j, v := range xtmp { 2243 atmp[j] += alpha * (xi*y[j] + v*yi) 2244 } 2245 offset += i + 2 2246 } 2247 return 2248 } 2249 ix := kx 2250 iy := ky 2251 for i := 0; i < n; i++ { 2252 jx := kx 2253 jy := ky 2254 atmp := ap[offset-i:] 2255 for j := 0; j <= i; j++ { 2256 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2257 jx += incX 2258 jy += incY 2259 } 2260 ix += incX 2261 iy += incY 2262 offset += i + 2 2263 } 2264 }