github.com/gopherd/gonum@v0.0.4/blas/gonum/level2float64.go (about) 1 // Copyright ©2014 The Gonum Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package gonum 6 7 import ( 8 "github.com/gopherd/gonum/blas" 9 "github.com/gopherd/gonum/internal/asm/f64" 10 ) 11 12 var _ blas.Float64Level2 = Implementation{} 13 14 // Dger performs the rank-one operation 15 // A += alpha * x * yᵀ 16 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 17 func (Implementation) Dger(m, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 18 if m < 0 { 19 panic(mLT0) 20 } 21 if n < 0 { 22 panic(nLT0) 23 } 24 if lda < max(1, n) { 25 panic(badLdA) 26 } 27 if incX == 0 { 28 panic(zeroIncX) 29 } 30 if incY == 0 { 31 panic(zeroIncY) 32 } 33 34 // Quick return if possible. 35 if m == 0 || n == 0 { 36 return 37 } 38 39 // For zero matrix size the following slice length checks are trivially satisfied. 40 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 41 panic(shortX) 42 } 43 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 44 panic(shortY) 45 } 46 if len(a) < lda*(m-1)+n { 47 panic(shortA) 48 } 49 50 // Quick return if possible. 51 if alpha == 0 { 52 return 53 } 54 f64.Ger(uintptr(m), uintptr(n), 55 alpha, 56 x, uintptr(incX), 57 y, uintptr(incY), 58 a, uintptr(lda)) 59 } 60 61 // Dgbmv performs one of the matrix-vector operations 62 // y = alpha * A * x + beta * y if tA == blas.NoTrans 63 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 64 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 65 // x and y are vectors, and alpha and beta are scalars. 66 func (Implementation) Dgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 67 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 68 panic(badTranspose) 69 } 70 if m < 0 { 71 panic(mLT0) 72 } 73 if n < 0 { 74 panic(nLT0) 75 } 76 if kL < 0 { 77 panic(kLLT0) 78 } 79 if kU < 0 { 80 panic(kULT0) 81 } 82 if lda < kL+kU+1 { 83 panic(badLdA) 84 } 85 if incX == 0 { 86 panic(zeroIncX) 87 } 88 if incY == 0 { 89 panic(zeroIncY) 90 } 91 92 // Quick return if possible. 93 if m == 0 || n == 0 { 94 return 95 } 96 97 // For zero matrix size the following slice length checks are trivially satisfied. 98 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 99 panic(shortA) 100 } 101 lenX := m 102 lenY := n 103 if tA == blas.NoTrans { 104 lenX = n 105 lenY = m 106 } 107 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 108 panic(shortX) 109 } 110 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 111 panic(shortY) 112 } 113 114 // Quick return if possible. 115 if alpha == 0 && beta == 1 { 116 return 117 } 118 119 var kx, ky int 120 if incX < 0 { 121 kx = -(lenX - 1) * incX 122 } 123 if incY < 0 { 124 ky = -(lenY - 1) * incY 125 } 126 127 // Form y = beta * y. 128 if beta != 1 { 129 if incY == 1 { 130 if beta == 0 { 131 for i := range y[:lenY] { 132 y[i] = 0 133 } 134 } else { 135 f64.ScalUnitary(beta, y[:lenY]) 136 } 137 } else { 138 iy := ky 139 if beta == 0 { 140 for i := 0; i < lenY; i++ { 141 y[iy] = 0 142 iy += incY 143 } 144 } else { 145 if incY > 0 { 146 f64.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 147 } else { 148 f64.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 149 } 150 } 151 } 152 } 153 154 if alpha == 0 { 155 return 156 } 157 158 // i and j are indices of the compacted banded matrix. 159 // off is the offset into the dense matrix (off + j = densej) 160 nCol := kU + 1 + kL 161 if tA == blas.NoTrans { 162 iy := ky 163 if incX == 1 { 164 for i := 0; i < min(m, n+kL); i++ { 165 l := max(0, kL-i) 166 u := min(nCol, n+kL-i) 167 off := max(0, i-kL) 168 atmp := a[i*lda+l : i*lda+u] 169 xtmp := x[off : off+u-l] 170 var sum float64 171 for j, v := range atmp { 172 sum += xtmp[j] * v 173 } 174 y[iy] += sum * alpha 175 iy += incY 176 } 177 return 178 } 179 for i := 0; i < min(m, n+kL); i++ { 180 l := max(0, kL-i) 181 u := min(nCol, n+kL-i) 182 off := max(0, i-kL) 183 atmp := a[i*lda+l : i*lda+u] 184 jx := kx 185 var sum float64 186 for _, v := range atmp { 187 sum += x[off*incX+jx] * v 188 jx += incX 189 } 190 y[iy] += sum * alpha 191 iy += incY 192 } 193 return 194 } 195 if incX == 1 { 196 for i := 0; i < min(m, n+kL); i++ { 197 l := max(0, kL-i) 198 u := min(nCol, n+kL-i) 199 off := max(0, i-kL) 200 atmp := a[i*lda+l : i*lda+u] 201 tmp := alpha * x[i] 202 jy := ky 203 for _, v := range atmp { 204 y[jy+off*incY] += tmp * v 205 jy += incY 206 } 207 } 208 return 209 } 210 ix := kx 211 for i := 0; i < min(m, n+kL); i++ { 212 l := max(0, kL-i) 213 u := min(nCol, n+kL-i) 214 off := max(0, i-kL) 215 atmp := a[i*lda+l : i*lda+u] 216 tmp := alpha * x[ix] 217 jy := ky 218 for _, v := range atmp { 219 y[jy+off*incY] += tmp * v 220 jy += incY 221 } 222 ix += incX 223 } 224 } 225 226 // Dgemv computes 227 // y = alpha * A * x + beta * y if tA = blas.NoTrans 228 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 229 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 230 func (Implementation) Dgemv(tA blas.Transpose, m, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 231 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 232 panic(badTranspose) 233 } 234 if m < 0 { 235 panic(mLT0) 236 } 237 if n < 0 { 238 panic(nLT0) 239 } 240 if lda < max(1, n) { 241 panic(badLdA) 242 } 243 if incX == 0 { 244 panic(zeroIncX) 245 } 246 if incY == 0 { 247 panic(zeroIncY) 248 } 249 // Set up indexes 250 lenX := m 251 lenY := n 252 if tA == blas.NoTrans { 253 lenX = n 254 lenY = m 255 } 256 257 // Quick return if possible 258 if m == 0 || n == 0 { 259 return 260 } 261 262 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 263 panic(shortX) 264 } 265 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 266 panic(shortY) 267 } 268 if len(a) < lda*(m-1)+n { 269 panic(shortA) 270 } 271 272 // Quick return if possible 273 if alpha == 0 && beta == 1 { 274 return 275 } 276 277 if alpha == 0 { 278 // First form y = beta * y 279 if incY > 0 { 280 Implementation{}.Dscal(lenY, beta, y, incY) 281 } else { 282 Implementation{}.Dscal(lenY, beta, y, -incY) 283 } 284 return 285 } 286 287 // Form y = alpha * A * x + y 288 if tA == blas.NoTrans { 289 f64.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 290 return 291 } 292 // Cases where a is transposed. 293 f64.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 294 } 295 296 // Dtrmv performs one of the matrix-vector operations 297 // x = A * x if tA == blas.NoTrans 298 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 299 // where A is an n×n triangular matrix, and x is a vector. 300 func (Implementation) Dtrmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 301 if ul != blas.Lower && ul != blas.Upper { 302 panic(badUplo) 303 } 304 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 305 panic(badTranspose) 306 } 307 if d != blas.NonUnit && d != blas.Unit { 308 panic(badDiag) 309 } 310 if n < 0 { 311 panic(nLT0) 312 } 313 if lda < max(1, n) { 314 panic(badLdA) 315 } 316 if incX == 0 { 317 panic(zeroIncX) 318 } 319 320 // Quick return if possible. 321 if n == 0 { 322 return 323 } 324 325 // For zero matrix size the following slice length checks are trivially satisfied. 326 if len(a) < lda*(n-1)+n { 327 panic(shortA) 328 } 329 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 330 panic(shortX) 331 } 332 333 nonUnit := d != blas.Unit 334 if n == 1 { 335 if nonUnit { 336 x[0] *= a[0] 337 } 338 return 339 } 340 var kx int 341 if incX <= 0 { 342 kx = -(n - 1) * incX 343 } 344 if tA == blas.NoTrans { 345 if ul == blas.Upper { 346 if incX == 1 { 347 for i := 0; i < n; i++ { 348 ilda := i * lda 349 var tmp float64 350 if nonUnit { 351 tmp = a[ilda+i] * x[i] 352 } else { 353 tmp = x[i] 354 } 355 x[i] = tmp + f64.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 356 } 357 return 358 } 359 ix := kx 360 for i := 0; i < n; i++ { 361 ilda := i * lda 362 var tmp float64 363 if nonUnit { 364 tmp = a[ilda+i] * x[ix] 365 } else { 366 tmp = x[ix] 367 } 368 x[ix] = tmp + f64.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 369 ix += incX 370 } 371 return 372 } 373 if incX == 1 { 374 for i := n - 1; i >= 0; i-- { 375 ilda := i * lda 376 var tmp float64 377 if nonUnit { 378 tmp += a[ilda+i] * x[i] 379 } else { 380 tmp = x[i] 381 } 382 x[i] = tmp + f64.DotUnitary(a[ilda:ilda+i], x[:i]) 383 } 384 return 385 } 386 ix := kx + (n-1)*incX 387 for i := n - 1; i >= 0; i-- { 388 ilda := i * lda 389 var tmp float64 390 if nonUnit { 391 tmp = a[ilda+i] * x[ix] 392 } else { 393 tmp = x[ix] 394 } 395 x[ix] = tmp + f64.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 396 ix -= incX 397 } 398 return 399 } 400 // Cases where a is transposed. 401 if ul == blas.Upper { 402 if incX == 1 { 403 for i := n - 1; i >= 0; i-- { 404 ilda := i * lda 405 xi := x[i] 406 f64.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 407 if nonUnit { 408 x[i] *= a[ilda+i] 409 } 410 } 411 return 412 } 413 ix := kx + (n-1)*incX 414 for i := n - 1; i >= 0; i-- { 415 ilda := i * lda 416 xi := x[ix] 417 f64.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 418 if nonUnit { 419 x[ix] *= a[ilda+i] 420 } 421 ix -= incX 422 } 423 return 424 } 425 if incX == 1 { 426 for i := 0; i < n; i++ { 427 ilda := i * lda 428 xi := x[i] 429 f64.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 430 if nonUnit { 431 x[i] *= a[i*lda+i] 432 } 433 } 434 return 435 } 436 ix := kx 437 for i := 0; i < n; i++ { 438 ilda := i * lda 439 xi := x[ix] 440 f64.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 441 if nonUnit { 442 x[ix] *= a[ilda+i] 443 } 444 ix += incX 445 } 446 } 447 448 // Dtrsv solves one of the systems of equations 449 // A * x = b if tA == blas.NoTrans 450 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 451 // where A is an n×n triangular matrix, and x and b are vectors. 452 // 453 // At entry to the function, x contains the values of b, and the result is 454 // stored in-place into x. 455 // 456 // No test for singularity or near-singularity is included in this 457 // routine. Such tests must be performed before calling this routine. 458 func (Implementation) Dtrsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float64, lda int, x []float64, incX int) { 459 if ul != blas.Lower && ul != blas.Upper { 460 panic(badUplo) 461 } 462 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 463 panic(badTranspose) 464 } 465 if d != blas.NonUnit && d != blas.Unit { 466 panic(badDiag) 467 } 468 if n < 0 { 469 panic(nLT0) 470 } 471 if lda < max(1, n) { 472 panic(badLdA) 473 } 474 if incX == 0 { 475 panic(zeroIncX) 476 } 477 478 // Quick return if possible. 479 if n == 0 { 480 return 481 } 482 483 // For zero matrix size the following slice length checks are trivially satisfied. 484 if len(a) < lda*(n-1)+n { 485 panic(shortA) 486 } 487 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 488 panic(shortX) 489 } 490 491 if n == 1 { 492 if d == blas.NonUnit { 493 x[0] /= a[0] 494 } 495 return 496 } 497 498 var kx int 499 if incX < 0 { 500 kx = -(n - 1) * incX 501 } 502 nonUnit := d == blas.NonUnit 503 if tA == blas.NoTrans { 504 if ul == blas.Upper { 505 if incX == 1 { 506 for i := n - 1; i >= 0; i-- { 507 var sum float64 508 atmp := a[i*lda+i+1 : i*lda+n] 509 for j, v := range atmp { 510 jv := i + j + 1 511 sum += x[jv] * v 512 } 513 x[i] -= sum 514 if nonUnit { 515 x[i] /= a[i*lda+i] 516 } 517 } 518 return 519 } 520 ix := kx + (n-1)*incX 521 for i := n - 1; i >= 0; i-- { 522 var sum float64 523 jx := ix + incX 524 atmp := a[i*lda+i+1 : i*lda+n] 525 for _, v := range atmp { 526 sum += x[jx] * v 527 jx += incX 528 } 529 x[ix] -= sum 530 if nonUnit { 531 x[ix] /= a[i*lda+i] 532 } 533 ix -= incX 534 } 535 return 536 } 537 if incX == 1 { 538 for i := 0; i < n; i++ { 539 var sum float64 540 atmp := a[i*lda : i*lda+i] 541 for j, v := range atmp { 542 sum += x[j] * v 543 } 544 x[i] -= sum 545 if nonUnit { 546 x[i] /= a[i*lda+i] 547 } 548 } 549 return 550 } 551 ix := kx 552 for i := 0; i < n; i++ { 553 jx := kx 554 var sum float64 555 atmp := a[i*lda : i*lda+i] 556 for _, v := range atmp { 557 sum += x[jx] * v 558 jx += incX 559 } 560 x[ix] -= sum 561 if nonUnit { 562 x[ix] /= a[i*lda+i] 563 } 564 ix += incX 565 } 566 return 567 } 568 // Cases where a is transposed. 569 if ul == blas.Upper { 570 if incX == 1 { 571 for i := 0; i < n; i++ { 572 if nonUnit { 573 x[i] /= a[i*lda+i] 574 } 575 xi := x[i] 576 atmp := a[i*lda+i+1 : i*lda+n] 577 for j, v := range atmp { 578 jv := j + i + 1 579 x[jv] -= v * xi 580 } 581 } 582 return 583 } 584 ix := kx 585 for i := 0; i < n; i++ { 586 if nonUnit { 587 x[ix] /= a[i*lda+i] 588 } 589 xi := x[ix] 590 jx := kx + (i+1)*incX 591 atmp := a[i*lda+i+1 : i*lda+n] 592 for _, v := range atmp { 593 x[jx] -= v * xi 594 jx += incX 595 } 596 ix += incX 597 } 598 return 599 } 600 if incX == 1 { 601 for i := n - 1; i >= 0; i-- { 602 if nonUnit { 603 x[i] /= a[i*lda+i] 604 } 605 xi := x[i] 606 atmp := a[i*lda : i*lda+i] 607 for j, v := range atmp { 608 x[j] -= v * xi 609 } 610 } 611 return 612 } 613 ix := kx + (n-1)*incX 614 for i := n - 1; i >= 0; i-- { 615 if nonUnit { 616 x[ix] /= a[i*lda+i] 617 } 618 xi := x[ix] 619 jx := kx 620 atmp := a[i*lda : i*lda+i] 621 for _, v := range atmp { 622 x[jx] -= v * xi 623 jx += incX 624 } 625 ix -= incX 626 } 627 } 628 629 // Dsymv performs the matrix-vector operation 630 // y = alpha * A * x + beta * y 631 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 632 // beta are scalars. 633 func (Implementation) Dsymv(ul blas.Uplo, n int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 634 if ul != blas.Lower && ul != blas.Upper { 635 panic(badUplo) 636 } 637 if n < 0 { 638 panic(nLT0) 639 } 640 if lda < max(1, n) { 641 panic(badLdA) 642 } 643 if incX == 0 { 644 panic(zeroIncX) 645 } 646 if incY == 0 { 647 panic(zeroIncY) 648 } 649 650 // Quick return if possible. 651 if n == 0 { 652 return 653 } 654 655 // For zero matrix size the following slice length checks are trivially satisfied. 656 if len(a) < lda*(n-1)+n { 657 panic(shortA) 658 } 659 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 660 panic(shortX) 661 } 662 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 663 panic(shortY) 664 } 665 666 // Quick return if possible. 667 if alpha == 0 && beta == 1 { 668 return 669 } 670 671 // Set up start points 672 var kx, ky int 673 if incX < 0 { 674 kx = -(n - 1) * incX 675 } 676 if incY < 0 { 677 ky = -(n - 1) * incY 678 } 679 680 // Form y = beta * y 681 if beta != 1 { 682 if incY == 1 { 683 if beta == 0 { 684 for i := range y[:n] { 685 y[i] = 0 686 } 687 } else { 688 f64.ScalUnitary(beta, y[:n]) 689 } 690 } else { 691 iy := ky 692 if beta == 0 { 693 for i := 0; i < n; i++ { 694 y[iy] = 0 695 iy += incY 696 } 697 } else { 698 if incY > 0 { 699 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 700 } else { 701 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 702 } 703 } 704 } 705 } 706 707 if alpha == 0 { 708 return 709 } 710 711 if n == 1 { 712 y[0] += alpha * a[0] * x[0] 713 return 714 } 715 716 if ul == blas.Upper { 717 if incX == 1 { 718 iy := ky 719 for i := 0; i < n; i++ { 720 xv := x[i] * alpha 721 sum := x[i] * a[i*lda+i] 722 jy := ky + (i+1)*incY 723 atmp := a[i*lda+i+1 : i*lda+n] 724 for j, v := range atmp { 725 jp := j + i + 1 726 sum += x[jp] * v 727 y[jy] += xv * v 728 jy += incY 729 } 730 y[iy] += alpha * sum 731 iy += incY 732 } 733 return 734 } 735 ix := kx 736 iy := ky 737 for i := 0; i < n; i++ { 738 xv := x[ix] * alpha 739 sum := x[ix] * a[i*lda+i] 740 jx := kx + (i+1)*incX 741 jy := ky + (i+1)*incY 742 atmp := a[i*lda+i+1 : i*lda+n] 743 for _, v := range atmp { 744 sum += x[jx] * v 745 y[jy] += xv * v 746 jx += incX 747 jy += incY 748 } 749 y[iy] += alpha * sum 750 ix += incX 751 iy += incY 752 } 753 return 754 } 755 // Cases where a is lower triangular. 756 if incX == 1 { 757 iy := ky 758 for i := 0; i < n; i++ { 759 jy := ky 760 xv := alpha * x[i] 761 atmp := a[i*lda : i*lda+i] 762 var sum float64 763 for j, v := range atmp { 764 sum += x[j] * v 765 y[jy] += xv * v 766 jy += incY 767 } 768 sum += x[i] * a[i*lda+i] 769 sum *= alpha 770 y[iy] += sum 771 iy += incY 772 } 773 return 774 } 775 ix := kx 776 iy := ky 777 for i := 0; i < n; i++ { 778 jx := kx 779 jy := ky 780 xv := alpha * x[ix] 781 atmp := a[i*lda : i*lda+i] 782 var sum float64 783 for _, v := range atmp { 784 sum += x[jx] * v 785 y[jy] += xv * v 786 jx += incX 787 jy += incY 788 } 789 sum += x[ix] * a[i*lda+i] 790 sum *= alpha 791 y[iy] += sum 792 ix += incX 793 iy += incY 794 } 795 } 796 797 // Dtbmv performs one of the matrix-vector operations 798 // x = A * x if tA == blas.NoTrans 799 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 800 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 801 func (Implementation) Dtbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 802 if ul != blas.Lower && ul != blas.Upper { 803 panic(badUplo) 804 } 805 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 806 panic(badTranspose) 807 } 808 if d != blas.NonUnit && d != blas.Unit { 809 panic(badDiag) 810 } 811 if n < 0 { 812 panic(nLT0) 813 } 814 if k < 0 { 815 panic(kLT0) 816 } 817 if lda < k+1 { 818 panic(badLdA) 819 } 820 if incX == 0 { 821 panic(zeroIncX) 822 } 823 824 // Quick return if possible. 825 if n == 0 { 826 return 827 } 828 829 // For zero matrix size the following slice length checks are trivially satisfied. 830 if len(a) < lda*(n-1)+k+1 { 831 panic(shortA) 832 } 833 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 834 panic(shortX) 835 } 836 837 var kx int 838 if incX < 0 { 839 kx = -(n - 1) * incX 840 } 841 842 nonunit := d != blas.Unit 843 844 if tA == blas.NoTrans { 845 if ul == blas.Upper { 846 if incX == 1 { 847 for i := 0; i < n; i++ { 848 u := min(1+k, n-i) 849 var sum float64 850 atmp := a[i*lda:] 851 xtmp := x[i:] 852 for j := 1; j < u; j++ { 853 sum += xtmp[j] * atmp[j] 854 } 855 if nonunit { 856 sum += xtmp[0] * atmp[0] 857 } else { 858 sum += xtmp[0] 859 } 860 x[i] = sum 861 } 862 return 863 } 864 ix := kx 865 for i := 0; i < n; i++ { 866 u := min(1+k, n-i) 867 var sum float64 868 atmp := a[i*lda:] 869 jx := incX 870 for j := 1; j < u; j++ { 871 sum += x[ix+jx] * atmp[j] 872 jx += incX 873 } 874 if nonunit { 875 sum += x[ix] * atmp[0] 876 } else { 877 sum += x[ix] 878 } 879 x[ix] = sum 880 ix += incX 881 } 882 return 883 } 884 if incX == 1 { 885 for i := n - 1; i >= 0; i-- { 886 l := max(0, k-i) 887 atmp := a[i*lda:] 888 var sum float64 889 for j := l; j < k; j++ { 890 sum += x[i-k+j] * atmp[j] 891 } 892 if nonunit { 893 sum += x[i] * atmp[k] 894 } else { 895 sum += x[i] 896 } 897 x[i] = sum 898 } 899 return 900 } 901 ix := kx + (n-1)*incX 902 for i := n - 1; i >= 0; i-- { 903 l := max(0, k-i) 904 atmp := a[i*lda:] 905 var sum float64 906 jx := l * incX 907 for j := l; j < k; j++ { 908 sum += x[ix-k*incX+jx] * atmp[j] 909 jx += incX 910 } 911 if nonunit { 912 sum += x[ix] * atmp[k] 913 } else { 914 sum += x[ix] 915 } 916 x[ix] = sum 917 ix -= incX 918 } 919 return 920 } 921 if ul == blas.Upper { 922 if incX == 1 { 923 for i := n - 1; i >= 0; i-- { 924 u := k + 1 925 if i < u { 926 u = i + 1 927 } 928 var sum float64 929 for j := 1; j < u; j++ { 930 sum += x[i-j] * a[(i-j)*lda+j] 931 } 932 if nonunit { 933 sum += x[i] * a[i*lda] 934 } else { 935 sum += x[i] 936 } 937 x[i] = sum 938 } 939 return 940 } 941 ix := kx + (n-1)*incX 942 for i := n - 1; i >= 0; i-- { 943 u := k + 1 944 if i < u { 945 u = i + 1 946 } 947 var sum float64 948 jx := incX 949 for j := 1; j < u; j++ { 950 sum += x[ix-jx] * a[(i-j)*lda+j] 951 jx += incX 952 } 953 if nonunit { 954 sum += x[ix] * a[i*lda] 955 } else { 956 sum += x[ix] 957 } 958 x[ix] = sum 959 ix -= incX 960 } 961 return 962 } 963 if incX == 1 { 964 for i := 0; i < n; i++ { 965 u := k 966 if i+k >= n { 967 u = n - i - 1 968 } 969 var sum float64 970 for j := 0; j < u; j++ { 971 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 972 } 973 if nonunit { 974 sum += x[i] * a[i*lda+k] 975 } else { 976 sum += x[i] 977 } 978 x[i] = sum 979 } 980 return 981 } 982 ix := kx 983 for i := 0; i < n; i++ { 984 u := k 985 if i+k >= n { 986 u = n - i - 1 987 } 988 var ( 989 sum float64 990 jx int 991 ) 992 for j := 0; j < u; j++ { 993 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 994 jx += incX 995 } 996 if nonunit { 997 sum += x[ix] * a[i*lda+k] 998 } else { 999 sum += x[ix] 1000 } 1001 x[ix] = sum 1002 ix += incX 1003 } 1004 } 1005 1006 // Dtpmv performs one of the matrix-vector operations 1007 // x = A * x if tA == blas.NoTrans 1008 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 1009 // where A is an n×n triangular matrix in packed format, and x is a vector. 1010 func (Implementation) Dtpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 1011 if ul != blas.Lower && ul != blas.Upper { 1012 panic(badUplo) 1013 } 1014 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1015 panic(badTranspose) 1016 } 1017 if d != blas.NonUnit && d != blas.Unit { 1018 panic(badDiag) 1019 } 1020 if n < 0 { 1021 panic(nLT0) 1022 } 1023 if incX == 0 { 1024 panic(zeroIncX) 1025 } 1026 1027 // Quick return if possible. 1028 if n == 0 { 1029 return 1030 } 1031 1032 // For zero matrix size the following slice length checks are trivially satisfied. 1033 if len(ap) < n*(n+1)/2 { 1034 panic(shortAP) 1035 } 1036 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1037 panic(shortX) 1038 } 1039 1040 var kx int 1041 if incX < 0 { 1042 kx = -(n - 1) * incX 1043 } 1044 1045 nonUnit := d == blas.NonUnit 1046 var offset int // Offset is the index of (i,i) 1047 if tA == blas.NoTrans { 1048 if ul == blas.Upper { 1049 if incX == 1 { 1050 for i := 0; i < n; i++ { 1051 xi := x[i] 1052 if nonUnit { 1053 xi *= ap[offset] 1054 } 1055 atmp := ap[offset+1 : offset+n-i] 1056 xtmp := x[i+1:] 1057 for j, v := range atmp { 1058 xi += v * xtmp[j] 1059 } 1060 x[i] = xi 1061 offset += n - i 1062 } 1063 return 1064 } 1065 ix := kx 1066 for i := 0; i < n; i++ { 1067 xix := x[ix] 1068 if nonUnit { 1069 xix *= ap[offset] 1070 } 1071 atmp := ap[offset+1 : offset+n-i] 1072 jx := kx + (i+1)*incX 1073 for _, v := range atmp { 1074 xix += v * x[jx] 1075 jx += incX 1076 } 1077 x[ix] = xix 1078 offset += n - i 1079 ix += incX 1080 } 1081 return 1082 } 1083 if incX == 1 { 1084 offset = n*(n+1)/2 - 1 1085 for i := n - 1; i >= 0; i-- { 1086 xi := x[i] 1087 if nonUnit { 1088 xi *= ap[offset] 1089 } 1090 atmp := ap[offset-i : offset] 1091 for j, v := range atmp { 1092 xi += v * x[j] 1093 } 1094 x[i] = xi 1095 offset -= i + 1 1096 } 1097 return 1098 } 1099 ix := kx + (n-1)*incX 1100 offset = n*(n+1)/2 - 1 1101 for i := n - 1; i >= 0; i-- { 1102 xix := x[ix] 1103 if nonUnit { 1104 xix *= ap[offset] 1105 } 1106 atmp := ap[offset-i : offset] 1107 jx := kx 1108 for _, v := range atmp { 1109 xix += v * x[jx] 1110 jx += incX 1111 } 1112 x[ix] = xix 1113 offset -= i + 1 1114 ix -= incX 1115 } 1116 return 1117 } 1118 // Cases where ap is transposed. 1119 if ul == blas.Upper { 1120 if incX == 1 { 1121 offset = n*(n+1)/2 - 1 1122 for i := n - 1; i >= 0; i-- { 1123 xi := x[i] 1124 atmp := ap[offset+1 : offset+n-i] 1125 xtmp := x[i+1:] 1126 for j, v := range atmp { 1127 xtmp[j] += v * xi 1128 } 1129 if nonUnit { 1130 x[i] *= ap[offset] 1131 } 1132 offset -= n - i + 1 1133 } 1134 return 1135 } 1136 ix := kx + (n-1)*incX 1137 offset = n*(n+1)/2 - 1 1138 for i := n - 1; i >= 0; i-- { 1139 xix := x[ix] 1140 jx := kx + (i+1)*incX 1141 atmp := ap[offset+1 : offset+n-i] 1142 for _, v := range atmp { 1143 x[jx] += v * xix 1144 jx += incX 1145 } 1146 if nonUnit { 1147 x[ix] *= ap[offset] 1148 } 1149 offset -= n - i + 1 1150 ix -= incX 1151 } 1152 return 1153 } 1154 if incX == 1 { 1155 for i := 0; i < n; i++ { 1156 xi := x[i] 1157 atmp := ap[offset-i : offset] 1158 for j, v := range atmp { 1159 x[j] += v * xi 1160 } 1161 if nonUnit { 1162 x[i] *= ap[offset] 1163 } 1164 offset += i + 2 1165 } 1166 return 1167 } 1168 ix := kx 1169 for i := 0; i < n; i++ { 1170 xix := x[ix] 1171 jx := kx 1172 atmp := ap[offset-i : offset] 1173 for _, v := range atmp { 1174 x[jx] += v * xix 1175 jx += incX 1176 } 1177 if nonUnit { 1178 x[ix] *= ap[offset] 1179 } 1180 ix += incX 1181 offset += i + 2 1182 } 1183 } 1184 1185 // Dtbsv solves one of the systems of equations 1186 // A * x = b if tA == blas.NoTrans 1187 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1188 // where A is an n×n triangular band matrix with k+1 diagonals, 1189 // and x and b are vectors. 1190 // 1191 // At entry to the function, x contains the values of b, and the result is 1192 // stored in-place into x. 1193 // 1194 // No test for singularity or near-singularity is included in this 1195 // routine. Such tests must be performed before calling this routine. 1196 func (Implementation) Dtbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float64, lda int, x []float64, incX int) { 1197 if ul != blas.Lower && ul != blas.Upper { 1198 panic(badUplo) 1199 } 1200 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1201 panic(badTranspose) 1202 } 1203 if d != blas.NonUnit && d != blas.Unit { 1204 panic(badDiag) 1205 } 1206 if n < 0 { 1207 panic(nLT0) 1208 } 1209 if k < 0 { 1210 panic(kLT0) 1211 } 1212 if lda < k+1 { 1213 panic(badLdA) 1214 } 1215 if incX == 0 { 1216 panic(zeroIncX) 1217 } 1218 1219 // Quick return if possible. 1220 if n == 0 { 1221 return 1222 } 1223 1224 // For zero matrix size the following slice length checks are trivially satisfied. 1225 if len(a) < lda*(n-1)+k+1 { 1226 panic(shortA) 1227 } 1228 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1229 panic(shortX) 1230 } 1231 1232 var kx int 1233 if incX < 0 { 1234 kx = -(n - 1) * incX 1235 } 1236 nonUnit := d == blas.NonUnit 1237 // Form x = A^-1 x. 1238 // Several cases below use subslices for speed improvement. 1239 // The incX != 1 cases usually do not because incX may be negative. 1240 if tA == blas.NoTrans { 1241 if ul == blas.Upper { 1242 if incX == 1 { 1243 for i := n - 1; i >= 0; i-- { 1244 bands := k 1245 if i+bands >= n { 1246 bands = n - i - 1 1247 } 1248 atmp := a[i*lda+1:] 1249 xtmp := x[i+1 : i+bands+1] 1250 var sum float64 1251 for j, v := range xtmp { 1252 sum += v * atmp[j] 1253 } 1254 x[i] -= sum 1255 if nonUnit { 1256 x[i] /= a[i*lda] 1257 } 1258 } 1259 return 1260 } 1261 ix := kx + (n-1)*incX 1262 for i := n - 1; i >= 0; i-- { 1263 max := k + 1 1264 if i+max > n { 1265 max = n - i 1266 } 1267 atmp := a[i*lda:] 1268 var ( 1269 jx int 1270 sum float64 1271 ) 1272 for j := 1; j < max; j++ { 1273 jx += incX 1274 sum += x[ix+jx] * atmp[j] 1275 } 1276 x[ix] -= sum 1277 if nonUnit { 1278 x[ix] /= atmp[0] 1279 } 1280 ix -= incX 1281 } 1282 return 1283 } 1284 if incX == 1 { 1285 for i := 0; i < n; i++ { 1286 bands := k 1287 if i-k < 0 { 1288 bands = i 1289 } 1290 atmp := a[i*lda+k-bands:] 1291 xtmp := x[i-bands : i] 1292 var sum float64 1293 for j, v := range xtmp { 1294 sum += v * atmp[j] 1295 } 1296 x[i] -= sum 1297 if nonUnit { 1298 x[i] /= atmp[bands] 1299 } 1300 } 1301 return 1302 } 1303 ix := kx 1304 for i := 0; i < n; i++ { 1305 bands := k 1306 if i-k < 0 { 1307 bands = i 1308 } 1309 atmp := a[i*lda+k-bands:] 1310 var ( 1311 sum float64 1312 jx int 1313 ) 1314 for j := 0; j < bands; j++ { 1315 sum += x[ix-bands*incX+jx] * atmp[j] 1316 jx += incX 1317 } 1318 x[ix] -= sum 1319 if nonUnit { 1320 x[ix] /= atmp[bands] 1321 } 1322 ix += incX 1323 } 1324 return 1325 } 1326 // Cases where a is transposed. 1327 if ul == blas.Upper { 1328 if incX == 1 { 1329 for i := 0; i < n; i++ { 1330 bands := k 1331 if i-k < 0 { 1332 bands = i 1333 } 1334 var sum float64 1335 for j := 0; j < bands; j++ { 1336 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1337 } 1338 x[i] -= sum 1339 if nonUnit { 1340 x[i] /= a[i*lda] 1341 } 1342 } 1343 return 1344 } 1345 ix := kx 1346 for i := 0; i < n; i++ { 1347 bands := k 1348 if i-k < 0 { 1349 bands = i 1350 } 1351 var ( 1352 sum float64 1353 jx int 1354 ) 1355 for j := 0; j < bands; j++ { 1356 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1357 jx += incX 1358 } 1359 x[ix] -= sum 1360 if nonUnit { 1361 x[ix] /= a[i*lda] 1362 } 1363 ix += incX 1364 } 1365 return 1366 } 1367 if incX == 1 { 1368 for i := n - 1; i >= 0; i-- { 1369 bands := k 1370 if i+bands >= n { 1371 bands = n - i - 1 1372 } 1373 var sum float64 1374 xtmp := x[i+1 : i+1+bands] 1375 for j, v := range xtmp { 1376 sum += v * a[(i+j+1)*lda+k-j-1] 1377 } 1378 x[i] -= sum 1379 if nonUnit { 1380 x[i] /= a[i*lda+k] 1381 } 1382 } 1383 return 1384 } 1385 ix := kx + (n-1)*incX 1386 for i := n - 1; i >= 0; i-- { 1387 bands := k 1388 if i+bands >= n { 1389 bands = n - i - 1 1390 } 1391 var ( 1392 sum float64 1393 jx int 1394 ) 1395 for j := 0; j < bands; j++ { 1396 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1397 jx += incX 1398 } 1399 x[ix] -= sum 1400 if nonUnit { 1401 x[ix] /= a[i*lda+k] 1402 } 1403 ix -= incX 1404 } 1405 } 1406 1407 // Dsbmv performs the matrix-vector operation 1408 // y = alpha * A * x + beta * y 1409 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1410 // vectors, and alpha and beta are scalars. 1411 func (Implementation) Dsbmv(ul blas.Uplo, n, k int, alpha float64, a []float64, lda int, x []float64, incX int, beta float64, y []float64, incY int) { 1412 if ul != blas.Lower && ul != blas.Upper { 1413 panic(badUplo) 1414 } 1415 if n < 0 { 1416 panic(nLT0) 1417 } 1418 if k < 0 { 1419 panic(kLT0) 1420 } 1421 if lda < k+1 { 1422 panic(badLdA) 1423 } 1424 if incX == 0 { 1425 panic(zeroIncX) 1426 } 1427 if incY == 0 { 1428 panic(zeroIncY) 1429 } 1430 1431 // Quick return if possible. 1432 if n == 0 { 1433 return 1434 } 1435 1436 // For zero matrix size the following slice length checks are trivially satisfied. 1437 if len(a) < lda*(n-1)+k+1 { 1438 panic(shortA) 1439 } 1440 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1441 panic(shortX) 1442 } 1443 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1444 panic(shortY) 1445 } 1446 1447 // Quick return if possible. 1448 if alpha == 0 && beta == 1 { 1449 return 1450 } 1451 1452 // Set up indexes 1453 lenX := n 1454 lenY := n 1455 var kx, ky int 1456 if incX < 0 { 1457 kx = -(lenX - 1) * incX 1458 } 1459 if incY < 0 { 1460 ky = -(lenY - 1) * incY 1461 } 1462 1463 // Form y = beta * y. 1464 if beta != 1 { 1465 if incY == 1 { 1466 if beta == 0 { 1467 for i := range y[:n] { 1468 y[i] = 0 1469 } 1470 } else { 1471 f64.ScalUnitary(beta, y[:n]) 1472 } 1473 } else { 1474 iy := ky 1475 if beta == 0 { 1476 for i := 0; i < n; i++ { 1477 y[iy] = 0 1478 iy += incY 1479 } 1480 } else { 1481 if incY > 0 { 1482 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1483 } else { 1484 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1485 } 1486 } 1487 } 1488 } 1489 1490 if alpha == 0 { 1491 return 1492 } 1493 1494 if ul == blas.Upper { 1495 if incX == 1 { 1496 iy := ky 1497 for i := 0; i < n; i++ { 1498 atmp := a[i*lda:] 1499 tmp := alpha * x[i] 1500 sum := tmp * atmp[0] 1501 u := min(k, n-i-1) 1502 jy := incY 1503 for j := 1; j <= u; j++ { 1504 v := atmp[j] 1505 sum += alpha * x[i+j] * v 1506 y[iy+jy] += tmp * v 1507 jy += incY 1508 } 1509 y[iy] += sum 1510 iy += incY 1511 } 1512 return 1513 } 1514 ix := kx 1515 iy := ky 1516 for i := 0; i < n; i++ { 1517 atmp := a[i*lda:] 1518 tmp := alpha * x[ix] 1519 sum := tmp * atmp[0] 1520 u := min(k, n-i-1) 1521 jx := incX 1522 jy := incY 1523 for j := 1; j <= u; j++ { 1524 v := atmp[j] 1525 sum += alpha * x[ix+jx] * v 1526 y[iy+jy] += tmp * v 1527 jx += incX 1528 jy += incY 1529 } 1530 y[iy] += sum 1531 ix += incX 1532 iy += incY 1533 } 1534 return 1535 } 1536 1537 // Casses where a has bands below the diagonal. 1538 if incX == 1 { 1539 iy := ky 1540 for i := 0; i < n; i++ { 1541 l := max(0, k-i) 1542 tmp := alpha * x[i] 1543 jy := l * incY 1544 atmp := a[i*lda:] 1545 for j := l; j < k; j++ { 1546 v := atmp[j] 1547 y[iy] += alpha * v * x[i-k+j] 1548 y[iy-k*incY+jy] += tmp * v 1549 jy += incY 1550 } 1551 y[iy] += tmp * atmp[k] 1552 iy += incY 1553 } 1554 return 1555 } 1556 ix := kx 1557 iy := ky 1558 for i := 0; i < n; i++ { 1559 l := max(0, k-i) 1560 tmp := alpha * x[ix] 1561 jx := l * incX 1562 jy := l * incY 1563 atmp := a[i*lda:] 1564 for j := l; j < k; j++ { 1565 v := atmp[j] 1566 y[iy] += alpha * v * x[ix-k*incX+jx] 1567 y[iy-k*incY+jy] += tmp * v 1568 jx += incX 1569 jy += incY 1570 } 1571 y[iy] += tmp * atmp[k] 1572 ix += incX 1573 iy += incY 1574 } 1575 } 1576 1577 // Dsyr performs the symmetric rank-one update 1578 // A += alpha * x * xᵀ 1579 // where A is an n×n symmetric matrix, and x is a vector. 1580 func (Implementation) Dsyr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, a []float64, lda int) { 1581 if ul != blas.Lower && ul != blas.Upper { 1582 panic(badUplo) 1583 } 1584 if n < 0 { 1585 panic(nLT0) 1586 } 1587 if lda < max(1, n) { 1588 panic(badLdA) 1589 } 1590 if incX == 0 { 1591 panic(zeroIncX) 1592 } 1593 1594 // Quick return if possible. 1595 if n == 0 { 1596 return 1597 } 1598 1599 // For zero matrix size the following slice length checks are trivially satisfied. 1600 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1601 panic(shortX) 1602 } 1603 if len(a) < lda*(n-1)+n { 1604 panic(shortA) 1605 } 1606 1607 // Quick return if possible. 1608 if alpha == 0 { 1609 return 1610 } 1611 1612 lenX := n 1613 var kx int 1614 if incX < 0 { 1615 kx = -(lenX - 1) * incX 1616 } 1617 if ul == blas.Upper { 1618 if incX == 1 { 1619 for i := 0; i < n; i++ { 1620 tmp := x[i] * alpha 1621 if tmp != 0 { 1622 atmp := a[i*lda+i : i*lda+n] 1623 xtmp := x[i:n] 1624 for j, v := range xtmp { 1625 atmp[j] += v * tmp 1626 } 1627 } 1628 } 1629 return 1630 } 1631 ix := kx 1632 for i := 0; i < n; i++ { 1633 tmp := x[ix] * alpha 1634 if tmp != 0 { 1635 jx := ix 1636 atmp := a[i*lda:] 1637 for j := i; j < n; j++ { 1638 atmp[j] += x[jx] * tmp 1639 jx += incX 1640 } 1641 } 1642 ix += incX 1643 } 1644 return 1645 } 1646 // Cases where a is lower triangular. 1647 if incX == 1 { 1648 for i := 0; i < n; i++ { 1649 tmp := x[i] * alpha 1650 if tmp != 0 { 1651 atmp := a[i*lda:] 1652 xtmp := x[:i+1] 1653 for j, v := range xtmp { 1654 atmp[j] += tmp * v 1655 } 1656 } 1657 } 1658 return 1659 } 1660 ix := kx 1661 for i := 0; i < n; i++ { 1662 tmp := x[ix] * alpha 1663 if tmp != 0 { 1664 atmp := a[i*lda:] 1665 jx := kx 1666 for j := 0; j < i+1; j++ { 1667 atmp[j] += tmp * x[jx] 1668 jx += incX 1669 } 1670 } 1671 ix += incX 1672 } 1673 } 1674 1675 // Dsyr2 performs the symmetric rank-two update 1676 // A += alpha * x * yᵀ + alpha * y * xᵀ 1677 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1678 func (Implementation) Dsyr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, a []float64, lda int) { 1679 if ul != blas.Lower && ul != blas.Upper { 1680 panic(badUplo) 1681 } 1682 if n < 0 { 1683 panic(nLT0) 1684 } 1685 if lda < max(1, n) { 1686 panic(badLdA) 1687 } 1688 if incX == 0 { 1689 panic(zeroIncX) 1690 } 1691 if incY == 0 { 1692 panic(zeroIncY) 1693 } 1694 1695 // Quick return if possible. 1696 if n == 0 { 1697 return 1698 } 1699 1700 // For zero matrix size the following slice length checks are trivially satisfied. 1701 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1702 panic(shortX) 1703 } 1704 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1705 panic(shortY) 1706 } 1707 if len(a) < lda*(n-1)+n { 1708 panic(shortA) 1709 } 1710 1711 // Quick return if possible. 1712 if alpha == 0 { 1713 return 1714 } 1715 1716 var ky, kx int 1717 if incY < 0 { 1718 ky = -(n - 1) * incY 1719 } 1720 if incX < 0 { 1721 kx = -(n - 1) * incX 1722 } 1723 if ul == blas.Upper { 1724 if incX == 1 && incY == 1 { 1725 for i := 0; i < n; i++ { 1726 xi := x[i] 1727 yi := y[i] 1728 atmp := a[i*lda:] 1729 for j := i; j < n; j++ { 1730 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1731 } 1732 } 1733 return 1734 } 1735 ix := kx 1736 iy := ky 1737 for i := 0; i < n; i++ { 1738 jx := kx + i*incX 1739 jy := ky + i*incY 1740 xi := x[ix] 1741 yi := y[iy] 1742 atmp := a[i*lda:] 1743 for j := i; j < n; j++ { 1744 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1745 jx += incX 1746 jy += incY 1747 } 1748 ix += incX 1749 iy += incY 1750 } 1751 return 1752 } 1753 if incX == 1 && incY == 1 { 1754 for i := 0; i < n; i++ { 1755 xi := x[i] 1756 yi := y[i] 1757 atmp := a[i*lda:] 1758 for j := 0; j <= i; j++ { 1759 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1760 } 1761 } 1762 return 1763 } 1764 ix := kx 1765 iy := ky 1766 for i := 0; i < n; i++ { 1767 jx := kx 1768 jy := ky 1769 xi := x[ix] 1770 yi := y[iy] 1771 atmp := a[i*lda:] 1772 for j := 0; j <= i; j++ { 1773 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1774 jx += incX 1775 jy += incY 1776 } 1777 ix += incX 1778 iy += incY 1779 } 1780 } 1781 1782 // Dtpsv solves one of the systems of equations 1783 // A * x = b if tA == blas.NoTrans 1784 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1785 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1786 // 1787 // At entry to the function, x contains the values of b, and the result is 1788 // stored in-place into x. 1789 // 1790 // No test for singularity or near-singularity is included in this 1791 // routine. Such tests must be performed before calling this routine. 1792 func (Implementation) Dtpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float64, x []float64, incX int) { 1793 if ul != blas.Lower && ul != blas.Upper { 1794 panic(badUplo) 1795 } 1796 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1797 panic(badTranspose) 1798 } 1799 if d != blas.NonUnit && d != blas.Unit { 1800 panic(badDiag) 1801 } 1802 if n < 0 { 1803 panic(nLT0) 1804 } 1805 if incX == 0 { 1806 panic(zeroIncX) 1807 } 1808 1809 // Quick return if possible. 1810 if n == 0 { 1811 return 1812 } 1813 1814 // For zero matrix size the following slice length checks are trivially satisfied. 1815 if len(ap) < n*(n+1)/2 { 1816 panic(shortAP) 1817 } 1818 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1819 panic(shortX) 1820 } 1821 1822 var kx int 1823 if incX < 0 { 1824 kx = -(n - 1) * incX 1825 } 1826 1827 nonUnit := d == blas.NonUnit 1828 var offset int // Offset is the index of (i,i) 1829 if tA == blas.NoTrans { 1830 if ul == blas.Upper { 1831 offset = n*(n+1)/2 - 1 1832 if incX == 1 { 1833 for i := n - 1; i >= 0; i-- { 1834 atmp := ap[offset+1 : offset+n-i] 1835 xtmp := x[i+1:] 1836 var sum float64 1837 for j, v := range atmp { 1838 sum += v * xtmp[j] 1839 } 1840 x[i] -= sum 1841 if nonUnit { 1842 x[i] /= ap[offset] 1843 } 1844 offset -= n - i + 1 1845 } 1846 return 1847 } 1848 ix := kx + (n-1)*incX 1849 for i := n - 1; i >= 0; i-- { 1850 atmp := ap[offset+1 : offset+n-i] 1851 jx := kx + (i+1)*incX 1852 var sum float64 1853 for _, v := range atmp { 1854 sum += v * x[jx] 1855 jx += incX 1856 } 1857 x[ix] -= sum 1858 if nonUnit { 1859 x[ix] /= ap[offset] 1860 } 1861 ix -= incX 1862 offset -= n - i + 1 1863 } 1864 return 1865 } 1866 if incX == 1 { 1867 for i := 0; i < n; i++ { 1868 atmp := ap[offset-i : offset] 1869 var sum float64 1870 for j, v := range atmp { 1871 sum += v * x[j] 1872 } 1873 x[i] -= sum 1874 if nonUnit { 1875 x[i] /= ap[offset] 1876 } 1877 offset += i + 2 1878 } 1879 return 1880 } 1881 ix := kx 1882 for i := 0; i < n; i++ { 1883 jx := kx 1884 atmp := ap[offset-i : offset] 1885 var sum float64 1886 for _, v := range atmp { 1887 sum += v * x[jx] 1888 jx += incX 1889 } 1890 x[ix] -= sum 1891 if nonUnit { 1892 x[ix] /= ap[offset] 1893 } 1894 ix += incX 1895 offset += i + 2 1896 } 1897 return 1898 } 1899 // Cases where ap is transposed. 1900 if ul == blas.Upper { 1901 if incX == 1 { 1902 for i := 0; i < n; i++ { 1903 if nonUnit { 1904 x[i] /= ap[offset] 1905 } 1906 xi := x[i] 1907 atmp := ap[offset+1 : offset+n-i] 1908 xtmp := x[i+1:] 1909 for j, v := range atmp { 1910 xtmp[j] -= v * xi 1911 } 1912 offset += n - i 1913 } 1914 return 1915 } 1916 ix := kx 1917 for i := 0; i < n; i++ { 1918 if nonUnit { 1919 x[ix] /= ap[offset] 1920 } 1921 xix := x[ix] 1922 atmp := ap[offset+1 : offset+n-i] 1923 jx := kx + (i+1)*incX 1924 for _, v := range atmp { 1925 x[jx] -= v * xix 1926 jx += incX 1927 } 1928 ix += incX 1929 offset += n - i 1930 } 1931 return 1932 } 1933 if incX == 1 { 1934 offset = n*(n+1)/2 - 1 1935 for i := n - 1; i >= 0; i-- { 1936 if nonUnit { 1937 x[i] /= ap[offset] 1938 } 1939 xi := x[i] 1940 atmp := ap[offset-i : offset] 1941 for j, v := range atmp { 1942 x[j] -= v * xi 1943 } 1944 offset -= i + 1 1945 } 1946 return 1947 } 1948 ix := kx + (n-1)*incX 1949 offset = n*(n+1)/2 - 1 1950 for i := n - 1; i >= 0; i-- { 1951 if nonUnit { 1952 x[ix] /= ap[offset] 1953 } 1954 xix := x[ix] 1955 atmp := ap[offset-i : offset] 1956 jx := kx 1957 for _, v := range atmp { 1958 x[jx] -= v * xix 1959 jx += incX 1960 } 1961 ix -= incX 1962 offset -= i + 1 1963 } 1964 } 1965 1966 // Dspmv performs the matrix-vector operation 1967 // y = alpha * A * x + beta * y 1968 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 1969 // and alpha and beta are scalars. 1970 func (Implementation) Dspmv(ul blas.Uplo, n int, alpha float64, ap []float64, x []float64, incX int, beta float64, y []float64, incY int) { 1971 if ul != blas.Lower && ul != blas.Upper { 1972 panic(badUplo) 1973 } 1974 if n < 0 { 1975 panic(nLT0) 1976 } 1977 if incX == 0 { 1978 panic(zeroIncX) 1979 } 1980 if incY == 0 { 1981 panic(zeroIncY) 1982 } 1983 1984 // Quick return if possible. 1985 if n == 0 { 1986 return 1987 } 1988 1989 // For zero matrix size the following slice length checks are trivially satisfied. 1990 if len(ap) < n*(n+1)/2 { 1991 panic(shortAP) 1992 } 1993 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1994 panic(shortX) 1995 } 1996 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1997 panic(shortY) 1998 } 1999 2000 // Quick return if possible. 2001 if alpha == 0 && beta == 1 { 2002 return 2003 } 2004 2005 // Set up start points 2006 var kx, ky int 2007 if incX < 0 { 2008 kx = -(n - 1) * incX 2009 } 2010 if incY < 0 { 2011 ky = -(n - 1) * incY 2012 } 2013 2014 // Form y = beta * y. 2015 if beta != 1 { 2016 if incY == 1 { 2017 if beta == 0 { 2018 for i := range y[:n] { 2019 y[i] = 0 2020 } 2021 } else { 2022 f64.ScalUnitary(beta, y[:n]) 2023 } 2024 } else { 2025 iy := ky 2026 if beta == 0 { 2027 for i := 0; i < n; i++ { 2028 y[iy] = 0 2029 iy += incY 2030 } 2031 } else { 2032 if incY > 0 { 2033 f64.ScalInc(beta, y, uintptr(n), uintptr(incY)) 2034 } else { 2035 f64.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 2036 } 2037 } 2038 } 2039 } 2040 2041 if alpha == 0 { 2042 return 2043 } 2044 2045 if n == 1 { 2046 y[0] += alpha * ap[0] * x[0] 2047 return 2048 } 2049 var offset int // Offset is the index of (i,i). 2050 if ul == blas.Upper { 2051 if incX == 1 { 2052 iy := ky 2053 for i := 0; i < n; i++ { 2054 xv := x[i] * alpha 2055 sum := ap[offset] * x[i] 2056 atmp := ap[offset+1 : offset+n-i] 2057 xtmp := x[i+1:] 2058 jy := ky + (i+1)*incY 2059 for j, v := range atmp { 2060 sum += v * xtmp[j] 2061 y[jy] += v * xv 2062 jy += incY 2063 } 2064 y[iy] += alpha * sum 2065 iy += incY 2066 offset += n - i 2067 } 2068 return 2069 } 2070 ix := kx 2071 iy := ky 2072 for i := 0; i < n; i++ { 2073 xv := x[ix] * alpha 2074 sum := ap[offset] * x[ix] 2075 atmp := ap[offset+1 : offset+n-i] 2076 jx := kx + (i+1)*incX 2077 jy := ky + (i+1)*incY 2078 for _, v := range atmp { 2079 sum += v * x[jx] 2080 y[jy] += v * xv 2081 jx += incX 2082 jy += incY 2083 } 2084 y[iy] += alpha * sum 2085 ix += incX 2086 iy += incY 2087 offset += n - i 2088 } 2089 return 2090 } 2091 if incX == 1 { 2092 iy := ky 2093 for i := 0; i < n; i++ { 2094 xv := x[i] * alpha 2095 atmp := ap[offset-i : offset] 2096 jy := ky 2097 var sum float64 2098 for j, v := range atmp { 2099 sum += v * x[j] 2100 y[jy] += v * xv 2101 jy += incY 2102 } 2103 sum += ap[offset] * x[i] 2104 y[iy] += alpha * sum 2105 iy += incY 2106 offset += i + 2 2107 } 2108 return 2109 } 2110 ix := kx 2111 iy := ky 2112 for i := 0; i < n; i++ { 2113 xv := x[ix] * alpha 2114 atmp := ap[offset-i : offset] 2115 jx := kx 2116 jy := ky 2117 var sum float64 2118 for _, v := range atmp { 2119 sum += v * x[jx] 2120 y[jy] += v * xv 2121 jx += incX 2122 jy += incY 2123 } 2124 2125 sum += ap[offset] * x[ix] 2126 y[iy] += alpha * sum 2127 ix += incX 2128 iy += incY 2129 offset += i + 2 2130 } 2131 } 2132 2133 // Dspr performs the symmetric rank-one operation 2134 // A += alpha * x * xᵀ 2135 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2136 // alpha is a scalar. 2137 func (Implementation) Dspr(ul blas.Uplo, n int, alpha float64, x []float64, incX int, ap []float64) { 2138 if ul != blas.Lower && ul != blas.Upper { 2139 panic(badUplo) 2140 } 2141 if n < 0 { 2142 panic(nLT0) 2143 } 2144 if incX == 0 { 2145 panic(zeroIncX) 2146 } 2147 2148 // Quick return if possible. 2149 if n == 0 { 2150 return 2151 } 2152 2153 // For zero matrix size the following slice length checks are trivially satisfied. 2154 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2155 panic(shortX) 2156 } 2157 if len(ap) < n*(n+1)/2 { 2158 panic(shortAP) 2159 } 2160 2161 // Quick return if possible. 2162 if alpha == 0 { 2163 return 2164 } 2165 2166 lenX := n 2167 var kx int 2168 if incX < 0 { 2169 kx = -(lenX - 1) * incX 2170 } 2171 var offset int // Offset is the index of (i,i). 2172 if ul == blas.Upper { 2173 if incX == 1 { 2174 for i := 0; i < n; i++ { 2175 atmp := ap[offset:] 2176 xv := alpha * x[i] 2177 xtmp := x[i:n] 2178 for j, v := range xtmp { 2179 atmp[j] += xv * v 2180 } 2181 offset += n - i 2182 } 2183 return 2184 } 2185 ix := kx 2186 for i := 0; i < n; i++ { 2187 jx := kx + i*incX 2188 atmp := ap[offset:] 2189 xv := alpha * x[ix] 2190 for j := 0; j < n-i; j++ { 2191 atmp[j] += xv * x[jx] 2192 jx += incX 2193 } 2194 ix += incX 2195 offset += n - i 2196 } 2197 return 2198 } 2199 if incX == 1 { 2200 for i := 0; i < n; i++ { 2201 atmp := ap[offset-i:] 2202 xv := alpha * x[i] 2203 xtmp := x[:i+1] 2204 for j, v := range xtmp { 2205 atmp[j] += xv * v 2206 } 2207 offset += i + 2 2208 } 2209 return 2210 } 2211 ix := kx 2212 for i := 0; i < n; i++ { 2213 jx := kx 2214 atmp := ap[offset-i:] 2215 xv := alpha * x[ix] 2216 for j := 0; j <= i; j++ { 2217 atmp[j] += xv * x[jx] 2218 jx += incX 2219 } 2220 ix += incX 2221 offset += i + 2 2222 } 2223 } 2224 2225 // Dspr2 performs the symmetric rank-2 update 2226 // A += alpha * x * yᵀ + alpha * y * xᵀ 2227 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2228 // and alpha is a scalar. 2229 func (Implementation) Dspr2(ul blas.Uplo, n int, alpha float64, x []float64, incX int, y []float64, incY int, ap []float64) { 2230 if ul != blas.Lower && ul != blas.Upper { 2231 panic(badUplo) 2232 } 2233 if n < 0 { 2234 panic(nLT0) 2235 } 2236 if incX == 0 { 2237 panic(zeroIncX) 2238 } 2239 if incY == 0 { 2240 panic(zeroIncY) 2241 } 2242 2243 // Quick return if possible. 2244 if n == 0 { 2245 return 2246 } 2247 2248 // For zero matrix size the following slice length checks are trivially satisfied. 2249 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2250 panic(shortX) 2251 } 2252 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2253 panic(shortY) 2254 } 2255 if len(ap) < n*(n+1)/2 { 2256 panic(shortAP) 2257 } 2258 2259 // Quick return if possible. 2260 if alpha == 0 { 2261 return 2262 } 2263 2264 var ky, kx int 2265 if incY < 0 { 2266 ky = -(n - 1) * incY 2267 } 2268 if incX < 0 { 2269 kx = -(n - 1) * incX 2270 } 2271 var offset int // Offset is the index of (i,i). 2272 if ul == blas.Upper { 2273 if incX == 1 && incY == 1 { 2274 for i := 0; i < n; i++ { 2275 atmp := ap[offset:] 2276 xi := x[i] 2277 yi := y[i] 2278 xtmp := x[i:n] 2279 ytmp := y[i:n] 2280 for j, v := range xtmp { 2281 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2282 } 2283 offset += n - i 2284 } 2285 return 2286 } 2287 ix := kx 2288 iy := ky 2289 for i := 0; i < n; i++ { 2290 jx := kx + i*incX 2291 jy := ky + i*incY 2292 atmp := ap[offset:] 2293 xi := x[ix] 2294 yi := y[iy] 2295 for j := 0; j < n-i; j++ { 2296 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2297 jx += incX 2298 jy += incY 2299 } 2300 ix += incX 2301 iy += incY 2302 offset += n - i 2303 } 2304 return 2305 } 2306 if incX == 1 && incY == 1 { 2307 for i := 0; i < n; i++ { 2308 atmp := ap[offset-i:] 2309 xi := x[i] 2310 yi := y[i] 2311 xtmp := x[:i+1] 2312 for j, v := range xtmp { 2313 atmp[j] += alpha * (xi*y[j] + v*yi) 2314 } 2315 offset += i + 2 2316 } 2317 return 2318 } 2319 ix := kx 2320 iy := ky 2321 for i := 0; i < n; i++ { 2322 jx := kx 2323 jy := ky 2324 atmp := ap[offset-i:] 2325 for j := 0; j <= i; j++ { 2326 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2327 jx += incX 2328 jy += incY 2329 } 2330 ix += incX 2331 iy += incY 2332 offset += i + 2 2333 } 2334 }