gonum.org/v1/gonum@v0.14.0/blas/gonum/level2float32.go (about) 1 // Code generated by "go generate gonum.org/v1/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2014 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 "gonum.org/v1/gonum/blas" 11 "gonum.org/v1/gonum/internal/asm/f32" 12 ) 13 14 var _ blas.Float32Level2 = Implementation{} 15 16 // Sger performs the rank-one operation 17 // 18 // A += alpha * x * yᵀ 19 // 20 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 21 // 22 // Float32 implementations are autogenerated and not directly tested. 23 func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 24 if m < 0 { 25 panic(mLT0) 26 } 27 if n < 0 { 28 panic(nLT0) 29 } 30 if lda < max(1, n) { 31 panic(badLdA) 32 } 33 if incX == 0 { 34 panic(zeroIncX) 35 } 36 if incY == 0 { 37 panic(zeroIncY) 38 } 39 40 // Quick return if possible. 41 if m == 0 || n == 0 { 42 return 43 } 44 45 // For zero matrix size the following slice length checks are trivially satisfied. 46 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 47 panic(shortX) 48 } 49 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 50 panic(shortY) 51 } 52 if len(a) < lda*(m-1)+n { 53 panic(shortA) 54 } 55 56 // Quick return if possible. 57 if alpha == 0 { 58 return 59 } 60 f32.Ger(uintptr(m), uintptr(n), 61 alpha, 62 x, uintptr(incX), 63 y, uintptr(incY), 64 a, uintptr(lda)) 65 } 66 67 // Sgbmv performs one of the matrix-vector operations 68 // 69 // y = alpha * A * x + beta * y if tA == blas.NoTrans 70 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 71 // 72 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 73 // x and y are vectors, and alpha and beta are scalars. 74 // 75 // Float32 implementations are autogenerated and not directly tested. 76 func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 77 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 78 panic(badTranspose) 79 } 80 if m < 0 { 81 panic(mLT0) 82 } 83 if n < 0 { 84 panic(nLT0) 85 } 86 if kL < 0 { 87 panic(kLLT0) 88 } 89 if kU < 0 { 90 panic(kULT0) 91 } 92 if lda < kL+kU+1 { 93 panic(badLdA) 94 } 95 if incX == 0 { 96 panic(zeroIncX) 97 } 98 if incY == 0 { 99 panic(zeroIncY) 100 } 101 102 // Quick return if possible. 103 if m == 0 || n == 0 { 104 return 105 } 106 107 // For zero matrix size the following slice length checks are trivially satisfied. 108 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 109 panic(shortA) 110 } 111 lenX := m 112 lenY := n 113 if tA == blas.NoTrans { 114 lenX = n 115 lenY = m 116 } 117 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 118 panic(shortX) 119 } 120 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 121 panic(shortY) 122 } 123 124 // Quick return if possible. 125 if alpha == 0 && beta == 1 { 126 return 127 } 128 129 var kx, ky int 130 if incX < 0 { 131 kx = -(lenX - 1) * incX 132 } 133 if incY < 0 { 134 ky = -(lenY - 1) * incY 135 } 136 137 // Form y = beta * y. 138 if beta != 1 { 139 if incY == 1 { 140 if beta == 0 { 141 for i := range y[:lenY] { 142 y[i] = 0 143 } 144 } else { 145 f32.ScalUnitary(beta, y[:lenY]) 146 } 147 } else { 148 iy := ky 149 if beta == 0 { 150 for i := 0; i < lenY; i++ { 151 y[iy] = 0 152 iy += incY 153 } 154 } else { 155 if incY > 0 { 156 f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 157 } else { 158 f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 159 } 160 } 161 } 162 } 163 164 if alpha == 0 { 165 return 166 } 167 168 // i and j are indices of the compacted banded matrix. 169 // off is the offset into the dense matrix (off + j = densej) 170 nCol := kU + 1 + kL 171 if tA == blas.NoTrans { 172 iy := ky 173 if incX == 1 { 174 for i := 0; i < min(m, n+kL); i++ { 175 l := max(0, kL-i) 176 u := min(nCol, n+kL-i) 177 off := max(0, i-kL) 178 atmp := a[i*lda+l : i*lda+u] 179 xtmp := x[off : off+u-l] 180 var sum float32 181 for j, v := range atmp { 182 sum += xtmp[j] * v 183 } 184 y[iy] += sum * alpha 185 iy += incY 186 } 187 return 188 } 189 for i := 0; i < min(m, n+kL); i++ { 190 l := max(0, kL-i) 191 u := min(nCol, n+kL-i) 192 off := max(0, i-kL) 193 atmp := a[i*lda+l : i*lda+u] 194 jx := kx 195 var sum float32 196 for _, v := range atmp { 197 sum += x[off*incX+jx] * v 198 jx += incX 199 } 200 y[iy] += sum * alpha 201 iy += incY 202 } 203 return 204 } 205 if incX == 1 { 206 for i := 0; i < min(m, n+kL); i++ { 207 l := max(0, kL-i) 208 u := min(nCol, n+kL-i) 209 off := max(0, i-kL) 210 atmp := a[i*lda+l : i*lda+u] 211 tmp := alpha * x[i] 212 jy := ky 213 for _, v := range atmp { 214 y[jy+off*incY] += tmp * v 215 jy += incY 216 } 217 } 218 return 219 } 220 ix := kx 221 for i := 0; i < min(m, n+kL); i++ { 222 l := max(0, kL-i) 223 u := min(nCol, n+kL-i) 224 off := max(0, i-kL) 225 atmp := a[i*lda+l : i*lda+u] 226 tmp := alpha * x[ix] 227 jy := ky 228 for _, v := range atmp { 229 y[jy+off*incY] += tmp * v 230 jy += incY 231 } 232 ix += incX 233 } 234 } 235 236 // Sgemv computes 237 // 238 // y = alpha * A * x + beta * y if tA = blas.NoTrans 239 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 240 // 241 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 242 // 243 // Float32 implementations are autogenerated and not directly tested. 244 func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 245 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 246 panic(badTranspose) 247 } 248 if m < 0 { 249 panic(mLT0) 250 } 251 if n < 0 { 252 panic(nLT0) 253 } 254 if lda < max(1, n) { 255 panic(badLdA) 256 } 257 if incX == 0 { 258 panic(zeroIncX) 259 } 260 if incY == 0 { 261 panic(zeroIncY) 262 } 263 // Set up indexes 264 lenX := m 265 lenY := n 266 if tA == blas.NoTrans { 267 lenX = n 268 lenY = m 269 } 270 271 // Quick return if possible 272 if m == 0 || n == 0 { 273 return 274 } 275 276 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 277 panic(shortX) 278 } 279 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 280 panic(shortY) 281 } 282 if len(a) < lda*(m-1)+n { 283 panic(shortA) 284 } 285 286 // Quick return if possible 287 if alpha == 0 && beta == 1 { 288 return 289 } 290 291 if alpha == 0 { 292 // First form y = beta * y 293 if incY > 0 { 294 Implementation{}.Sscal(lenY, beta, y, incY) 295 } else { 296 Implementation{}.Sscal(lenY, beta, y, -incY) 297 } 298 return 299 } 300 301 // Form y = alpha * A * x + y 302 if tA == blas.NoTrans { 303 f32.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 304 return 305 } 306 // Cases where a is transposed. 307 f32.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 308 } 309 310 // Strmv performs one of the matrix-vector operations 311 // 312 // x = A * x if tA == blas.NoTrans 313 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 314 // 315 // where A is an n×n triangular matrix, and x is a vector. 316 // 317 // Float32 implementations are autogenerated and not directly tested. 318 func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 319 if ul != blas.Lower && ul != blas.Upper { 320 panic(badUplo) 321 } 322 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 323 panic(badTranspose) 324 } 325 if d != blas.NonUnit && d != blas.Unit { 326 panic(badDiag) 327 } 328 if n < 0 { 329 panic(nLT0) 330 } 331 if lda < max(1, n) { 332 panic(badLdA) 333 } 334 if incX == 0 { 335 panic(zeroIncX) 336 } 337 338 // Quick return if possible. 339 if n == 0 { 340 return 341 } 342 343 // For zero matrix size the following slice length checks are trivially satisfied. 344 if len(a) < lda*(n-1)+n { 345 panic(shortA) 346 } 347 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 348 panic(shortX) 349 } 350 351 nonUnit := d != blas.Unit 352 if n == 1 { 353 if nonUnit { 354 x[0] *= a[0] 355 } 356 return 357 } 358 var kx int 359 if incX <= 0 { 360 kx = -(n - 1) * incX 361 } 362 if tA == blas.NoTrans { 363 if ul == blas.Upper { 364 if incX == 1 { 365 for i := 0; i < n; i++ { 366 ilda := i * lda 367 var tmp float32 368 if nonUnit { 369 tmp = a[ilda+i] * x[i] 370 } else { 371 tmp = x[i] 372 } 373 x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 374 } 375 return 376 } 377 ix := kx 378 for i := 0; i < n; i++ { 379 ilda := i * lda 380 var tmp float32 381 if nonUnit { 382 tmp = a[ilda+i] * x[ix] 383 } else { 384 tmp = x[ix] 385 } 386 x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 387 ix += incX 388 } 389 return 390 } 391 if incX == 1 { 392 for i := n - 1; i >= 0; i-- { 393 ilda := i * lda 394 var tmp float32 395 if nonUnit { 396 tmp += a[ilda+i] * x[i] 397 } else { 398 tmp = x[i] 399 } 400 x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i]) 401 } 402 return 403 } 404 ix := kx + (n-1)*incX 405 for i := n - 1; i >= 0; i-- { 406 ilda := i * lda 407 var tmp float32 408 if nonUnit { 409 tmp = a[ilda+i] * x[ix] 410 } else { 411 tmp = x[ix] 412 } 413 x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 414 ix -= incX 415 } 416 return 417 } 418 // Cases where a is transposed. 419 if ul == blas.Upper { 420 if incX == 1 { 421 for i := n - 1; i >= 0; i-- { 422 ilda := i * lda 423 xi := x[i] 424 f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 425 if nonUnit { 426 x[i] *= a[ilda+i] 427 } 428 } 429 return 430 } 431 ix := kx + (n-1)*incX 432 for i := n - 1; i >= 0; i-- { 433 ilda := i * lda 434 xi := x[ix] 435 f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 436 if nonUnit { 437 x[ix] *= a[ilda+i] 438 } 439 ix -= incX 440 } 441 return 442 } 443 if incX == 1 { 444 for i := 0; i < n; i++ { 445 ilda := i * lda 446 xi := x[i] 447 f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 448 if nonUnit { 449 x[i] *= a[i*lda+i] 450 } 451 } 452 return 453 } 454 ix := kx 455 for i := 0; i < n; i++ { 456 ilda := i * lda 457 xi := x[ix] 458 f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 459 if nonUnit { 460 x[ix] *= a[ilda+i] 461 } 462 ix += incX 463 } 464 } 465 466 // Strsv solves one of the systems of equations 467 // 468 // A * x = b if tA == blas.NoTrans 469 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 470 // 471 // where A is an n×n triangular matrix, and x and b are vectors. 472 // 473 // At entry to the function, x contains the values of b, and the result is 474 // stored in-place into x. 475 // 476 // No test for singularity or near-singularity is included in this 477 // routine. Such tests must be performed before calling this routine. 478 // 479 // Float32 implementations are autogenerated and not directly tested. 480 func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 481 if ul != blas.Lower && ul != blas.Upper { 482 panic(badUplo) 483 } 484 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 485 panic(badTranspose) 486 } 487 if d != blas.NonUnit && d != blas.Unit { 488 panic(badDiag) 489 } 490 if n < 0 { 491 panic(nLT0) 492 } 493 if lda < max(1, n) { 494 panic(badLdA) 495 } 496 if incX == 0 { 497 panic(zeroIncX) 498 } 499 500 // Quick return if possible. 501 if n == 0 { 502 return 503 } 504 505 // For zero matrix size the following slice length checks are trivially satisfied. 506 if len(a) < lda*(n-1)+n { 507 panic(shortA) 508 } 509 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 510 panic(shortX) 511 } 512 513 if n == 1 { 514 if d == blas.NonUnit { 515 x[0] /= a[0] 516 } 517 return 518 } 519 520 var kx int 521 if incX < 0 { 522 kx = -(n - 1) * incX 523 } 524 nonUnit := d == blas.NonUnit 525 if tA == blas.NoTrans { 526 if ul == blas.Upper { 527 if incX == 1 { 528 for i := n - 1; i >= 0; i-- { 529 var sum float32 530 atmp := a[i*lda+i+1 : i*lda+n] 531 for j, v := range atmp { 532 jv := i + j + 1 533 sum += x[jv] * v 534 } 535 x[i] -= sum 536 if nonUnit { 537 x[i] /= a[i*lda+i] 538 } 539 } 540 return 541 } 542 ix := kx + (n-1)*incX 543 for i := n - 1; i >= 0; i-- { 544 var sum float32 545 jx := ix + incX 546 atmp := a[i*lda+i+1 : i*lda+n] 547 for _, v := range atmp { 548 sum += x[jx] * v 549 jx += incX 550 } 551 x[ix] -= sum 552 if nonUnit { 553 x[ix] /= a[i*lda+i] 554 } 555 ix -= incX 556 } 557 return 558 } 559 if incX == 1 { 560 for i := 0; i < n; i++ { 561 var sum float32 562 atmp := a[i*lda : i*lda+i] 563 for j, v := range atmp { 564 sum += x[j] * v 565 } 566 x[i] -= sum 567 if nonUnit { 568 x[i] /= a[i*lda+i] 569 } 570 } 571 return 572 } 573 ix := kx 574 for i := 0; i < n; i++ { 575 jx := kx 576 var sum float32 577 atmp := a[i*lda : i*lda+i] 578 for _, v := range atmp { 579 sum += x[jx] * v 580 jx += incX 581 } 582 x[ix] -= sum 583 if nonUnit { 584 x[ix] /= a[i*lda+i] 585 } 586 ix += incX 587 } 588 return 589 } 590 // Cases where a is transposed. 591 if ul == blas.Upper { 592 if incX == 1 { 593 for i := 0; i < n; i++ { 594 if nonUnit { 595 x[i] /= a[i*lda+i] 596 } 597 xi := x[i] 598 atmp := a[i*lda+i+1 : i*lda+n] 599 for j, v := range atmp { 600 jv := j + i + 1 601 x[jv] -= v * xi 602 } 603 } 604 return 605 } 606 ix := kx 607 for i := 0; i < n; i++ { 608 if nonUnit { 609 x[ix] /= a[i*lda+i] 610 } 611 xi := x[ix] 612 jx := kx + (i+1)*incX 613 atmp := a[i*lda+i+1 : i*lda+n] 614 for _, v := range atmp { 615 x[jx] -= v * xi 616 jx += incX 617 } 618 ix += incX 619 } 620 return 621 } 622 if incX == 1 { 623 for i := n - 1; i >= 0; i-- { 624 if nonUnit { 625 x[i] /= a[i*lda+i] 626 } 627 xi := x[i] 628 atmp := a[i*lda : i*lda+i] 629 for j, v := range atmp { 630 x[j] -= v * xi 631 } 632 } 633 return 634 } 635 ix := kx + (n-1)*incX 636 for i := n - 1; i >= 0; i-- { 637 if nonUnit { 638 x[ix] /= a[i*lda+i] 639 } 640 xi := x[ix] 641 jx := kx 642 atmp := a[i*lda : i*lda+i] 643 for _, v := range atmp { 644 x[jx] -= v * xi 645 jx += incX 646 } 647 ix -= incX 648 } 649 } 650 651 // Ssymv performs the matrix-vector operation 652 // 653 // y = alpha * A * x + beta * y 654 // 655 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 656 // beta are scalars. 657 // 658 // Float32 implementations are autogenerated and not directly tested. 659 func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 660 if ul != blas.Lower && ul != blas.Upper { 661 panic(badUplo) 662 } 663 if n < 0 { 664 panic(nLT0) 665 } 666 if lda < max(1, n) { 667 panic(badLdA) 668 } 669 if incX == 0 { 670 panic(zeroIncX) 671 } 672 if incY == 0 { 673 panic(zeroIncY) 674 } 675 676 // Quick return if possible. 677 if n == 0 { 678 return 679 } 680 681 // For zero matrix size the following slice length checks are trivially satisfied. 682 if len(a) < lda*(n-1)+n { 683 panic(shortA) 684 } 685 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 686 panic(shortX) 687 } 688 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 689 panic(shortY) 690 } 691 692 // Quick return if possible. 693 if alpha == 0 && beta == 1 { 694 return 695 } 696 697 // Set up start points 698 var kx, ky int 699 if incX < 0 { 700 kx = -(n - 1) * incX 701 } 702 if incY < 0 { 703 ky = -(n - 1) * incY 704 } 705 706 // Form y = beta * y 707 if beta != 1 { 708 if incY == 1 { 709 if beta == 0 { 710 for i := range y[:n] { 711 y[i] = 0 712 } 713 } else { 714 f32.ScalUnitary(beta, y[:n]) 715 } 716 } else { 717 iy := ky 718 if beta == 0 { 719 for i := 0; i < n; i++ { 720 y[iy] = 0 721 iy += incY 722 } 723 } else { 724 if incY > 0 { 725 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 726 } else { 727 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 728 } 729 } 730 } 731 } 732 733 if alpha == 0 { 734 return 735 } 736 737 if n == 1 { 738 y[0] += alpha * a[0] * x[0] 739 return 740 } 741 742 if ul == blas.Upper { 743 if incX == 1 { 744 iy := ky 745 for i := 0; i < n; i++ { 746 xv := x[i] * alpha 747 sum := x[i] * a[i*lda+i] 748 jy := ky + (i+1)*incY 749 atmp := a[i*lda+i+1 : i*lda+n] 750 for j, v := range atmp { 751 jp := j + i + 1 752 sum += x[jp] * v 753 y[jy] += xv * v 754 jy += incY 755 } 756 y[iy] += alpha * sum 757 iy += incY 758 } 759 return 760 } 761 ix := kx 762 iy := ky 763 for i := 0; i < n; i++ { 764 xv := x[ix] * alpha 765 sum := x[ix] * a[i*lda+i] 766 jx := kx + (i+1)*incX 767 jy := ky + (i+1)*incY 768 atmp := a[i*lda+i+1 : i*lda+n] 769 for _, v := range atmp { 770 sum += x[jx] * v 771 y[jy] += xv * v 772 jx += incX 773 jy += incY 774 } 775 y[iy] += alpha * sum 776 ix += incX 777 iy += incY 778 } 779 return 780 } 781 // Cases where a is lower triangular. 782 if incX == 1 { 783 iy := ky 784 for i := 0; i < n; i++ { 785 jy := ky 786 xv := alpha * x[i] 787 atmp := a[i*lda : i*lda+i] 788 var sum float32 789 for j, v := range atmp { 790 sum += x[j] * v 791 y[jy] += xv * v 792 jy += incY 793 } 794 sum += x[i] * a[i*lda+i] 795 sum *= alpha 796 y[iy] += sum 797 iy += incY 798 } 799 return 800 } 801 ix := kx 802 iy := ky 803 for i := 0; i < n; i++ { 804 jx := kx 805 jy := ky 806 xv := alpha * x[ix] 807 atmp := a[i*lda : i*lda+i] 808 var sum float32 809 for _, v := range atmp { 810 sum += x[jx] * v 811 y[jy] += xv * v 812 jx += incX 813 jy += incY 814 } 815 sum += x[ix] * a[i*lda+i] 816 sum *= alpha 817 y[iy] += sum 818 ix += incX 819 iy += incY 820 } 821 } 822 823 // Stbmv performs one of the matrix-vector operations 824 // 825 // x = A * x if tA == blas.NoTrans 826 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 827 // 828 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 829 // 830 // Float32 implementations are autogenerated and not directly tested. 831 func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 832 if ul != blas.Lower && ul != blas.Upper { 833 panic(badUplo) 834 } 835 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 836 panic(badTranspose) 837 } 838 if d != blas.NonUnit && d != blas.Unit { 839 panic(badDiag) 840 } 841 if n < 0 { 842 panic(nLT0) 843 } 844 if k < 0 { 845 panic(kLT0) 846 } 847 if lda < k+1 { 848 panic(badLdA) 849 } 850 if incX == 0 { 851 panic(zeroIncX) 852 } 853 854 // Quick return if possible. 855 if n == 0 { 856 return 857 } 858 859 // For zero matrix size the following slice length checks are trivially satisfied. 860 if len(a) < lda*(n-1)+k+1 { 861 panic(shortA) 862 } 863 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 864 panic(shortX) 865 } 866 867 var kx int 868 if incX < 0 { 869 kx = -(n - 1) * incX 870 } 871 872 nonunit := d != blas.Unit 873 874 if tA == blas.NoTrans { 875 if ul == blas.Upper { 876 if incX == 1 { 877 for i := 0; i < n; i++ { 878 u := min(1+k, n-i) 879 var sum float32 880 atmp := a[i*lda:] 881 xtmp := x[i:] 882 for j := 1; j < u; j++ { 883 sum += xtmp[j] * atmp[j] 884 } 885 if nonunit { 886 sum += xtmp[0] * atmp[0] 887 } else { 888 sum += xtmp[0] 889 } 890 x[i] = sum 891 } 892 return 893 } 894 ix := kx 895 for i := 0; i < n; i++ { 896 u := min(1+k, n-i) 897 var sum float32 898 atmp := a[i*lda:] 899 jx := incX 900 for j := 1; j < u; j++ { 901 sum += x[ix+jx] * atmp[j] 902 jx += incX 903 } 904 if nonunit { 905 sum += x[ix] * atmp[0] 906 } else { 907 sum += x[ix] 908 } 909 x[ix] = sum 910 ix += incX 911 } 912 return 913 } 914 if incX == 1 { 915 for i := n - 1; i >= 0; i-- { 916 l := max(0, k-i) 917 atmp := a[i*lda:] 918 var sum float32 919 for j := l; j < k; j++ { 920 sum += x[i-k+j] * atmp[j] 921 } 922 if nonunit { 923 sum += x[i] * atmp[k] 924 } else { 925 sum += x[i] 926 } 927 x[i] = sum 928 } 929 return 930 } 931 ix := kx + (n-1)*incX 932 for i := n - 1; i >= 0; i-- { 933 l := max(0, k-i) 934 atmp := a[i*lda:] 935 var sum float32 936 jx := l * incX 937 for j := l; j < k; j++ { 938 sum += x[ix-k*incX+jx] * atmp[j] 939 jx += incX 940 } 941 if nonunit { 942 sum += x[ix] * atmp[k] 943 } else { 944 sum += x[ix] 945 } 946 x[ix] = sum 947 ix -= incX 948 } 949 return 950 } 951 if ul == blas.Upper { 952 if incX == 1 { 953 for i := n - 1; i >= 0; i-- { 954 u := k + 1 955 if i < u { 956 u = i + 1 957 } 958 var sum float32 959 for j := 1; j < u; j++ { 960 sum += x[i-j] * a[(i-j)*lda+j] 961 } 962 if nonunit { 963 sum += x[i] * a[i*lda] 964 } else { 965 sum += x[i] 966 } 967 x[i] = sum 968 } 969 return 970 } 971 ix := kx + (n-1)*incX 972 for i := n - 1; i >= 0; i-- { 973 u := k + 1 974 if i < u { 975 u = i + 1 976 } 977 var sum float32 978 jx := incX 979 for j := 1; j < u; j++ { 980 sum += x[ix-jx] * a[(i-j)*lda+j] 981 jx += incX 982 } 983 if nonunit { 984 sum += x[ix] * a[i*lda] 985 } else { 986 sum += x[ix] 987 } 988 x[ix] = sum 989 ix -= incX 990 } 991 return 992 } 993 if incX == 1 { 994 for i := 0; i < n; i++ { 995 u := k 996 if i+k >= n { 997 u = n - i - 1 998 } 999 var sum float32 1000 for j := 0; j < u; j++ { 1001 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 1002 } 1003 if nonunit { 1004 sum += x[i] * a[i*lda+k] 1005 } else { 1006 sum += x[i] 1007 } 1008 x[i] = sum 1009 } 1010 return 1011 } 1012 ix := kx 1013 for i := 0; i < n; i++ { 1014 u := k 1015 if i+k >= n { 1016 u = n - i - 1 1017 } 1018 var ( 1019 sum float32 1020 jx int 1021 ) 1022 for j := 0; j < u; j++ { 1023 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1024 jx += incX 1025 } 1026 if nonunit { 1027 sum += x[ix] * a[i*lda+k] 1028 } else { 1029 sum += x[ix] 1030 } 1031 x[ix] = sum 1032 ix += incX 1033 } 1034 } 1035 1036 // Stpmv performs one of the matrix-vector operations 1037 // 1038 // x = A * x if tA == blas.NoTrans 1039 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 1040 // 1041 // where A is an n×n triangular matrix in packed format, and x is a vector. 1042 // 1043 // Float32 implementations are autogenerated and not directly tested. 1044 func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 1045 if ul != blas.Lower && ul != blas.Upper { 1046 panic(badUplo) 1047 } 1048 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1049 panic(badTranspose) 1050 } 1051 if d != blas.NonUnit && d != blas.Unit { 1052 panic(badDiag) 1053 } 1054 if n < 0 { 1055 panic(nLT0) 1056 } 1057 if incX == 0 { 1058 panic(zeroIncX) 1059 } 1060 1061 // Quick return if possible. 1062 if n == 0 { 1063 return 1064 } 1065 1066 // For zero matrix size the following slice length checks are trivially satisfied. 1067 if len(ap) < n*(n+1)/2 { 1068 panic(shortAP) 1069 } 1070 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1071 panic(shortX) 1072 } 1073 1074 var kx int 1075 if incX < 0 { 1076 kx = -(n - 1) * incX 1077 } 1078 1079 nonUnit := d == blas.NonUnit 1080 var offset int // Offset is the index of (i,i) 1081 if tA == blas.NoTrans { 1082 if ul == blas.Upper { 1083 if incX == 1 { 1084 for i := 0; i < n; i++ { 1085 xi := x[i] 1086 if nonUnit { 1087 xi *= ap[offset] 1088 } 1089 atmp := ap[offset+1 : offset+n-i] 1090 xtmp := x[i+1:] 1091 for j, v := range atmp { 1092 xi += v * xtmp[j] 1093 } 1094 x[i] = xi 1095 offset += n - i 1096 } 1097 return 1098 } 1099 ix := kx 1100 for i := 0; i < n; i++ { 1101 xix := x[ix] 1102 if nonUnit { 1103 xix *= ap[offset] 1104 } 1105 atmp := ap[offset+1 : offset+n-i] 1106 jx := kx + (i+1)*incX 1107 for _, v := range atmp { 1108 xix += v * x[jx] 1109 jx += incX 1110 } 1111 x[ix] = xix 1112 offset += n - i 1113 ix += incX 1114 } 1115 return 1116 } 1117 if incX == 1 { 1118 offset = n*(n+1)/2 - 1 1119 for i := n - 1; i >= 0; i-- { 1120 xi := x[i] 1121 if nonUnit { 1122 xi *= ap[offset] 1123 } 1124 atmp := ap[offset-i : offset] 1125 for j, v := range atmp { 1126 xi += v * x[j] 1127 } 1128 x[i] = xi 1129 offset -= i + 1 1130 } 1131 return 1132 } 1133 ix := kx + (n-1)*incX 1134 offset = n*(n+1)/2 - 1 1135 for i := n - 1; i >= 0; i-- { 1136 xix := x[ix] 1137 if nonUnit { 1138 xix *= ap[offset] 1139 } 1140 atmp := ap[offset-i : offset] 1141 jx := kx 1142 for _, v := range atmp { 1143 xix += v * x[jx] 1144 jx += incX 1145 } 1146 x[ix] = xix 1147 offset -= i + 1 1148 ix -= incX 1149 } 1150 return 1151 } 1152 // Cases where ap is transposed. 1153 if ul == blas.Upper { 1154 if incX == 1 { 1155 offset = n*(n+1)/2 - 1 1156 for i := n - 1; i >= 0; i-- { 1157 xi := x[i] 1158 atmp := ap[offset+1 : offset+n-i] 1159 xtmp := x[i+1:] 1160 for j, v := range atmp { 1161 xtmp[j] += v * xi 1162 } 1163 if nonUnit { 1164 x[i] *= ap[offset] 1165 } 1166 offset -= n - i + 1 1167 } 1168 return 1169 } 1170 ix := kx + (n-1)*incX 1171 offset = n*(n+1)/2 - 1 1172 for i := n - 1; i >= 0; i-- { 1173 xix := x[ix] 1174 jx := kx + (i+1)*incX 1175 atmp := ap[offset+1 : offset+n-i] 1176 for _, v := range atmp { 1177 x[jx] += v * xix 1178 jx += incX 1179 } 1180 if nonUnit { 1181 x[ix] *= ap[offset] 1182 } 1183 offset -= n - i + 1 1184 ix -= incX 1185 } 1186 return 1187 } 1188 if incX == 1 { 1189 for i := 0; i < n; i++ { 1190 xi := x[i] 1191 atmp := ap[offset-i : offset] 1192 for j, v := range atmp { 1193 x[j] += v * xi 1194 } 1195 if nonUnit { 1196 x[i] *= ap[offset] 1197 } 1198 offset += i + 2 1199 } 1200 return 1201 } 1202 ix := kx 1203 for i := 0; i < n; i++ { 1204 xix := x[ix] 1205 jx := kx 1206 atmp := ap[offset-i : offset] 1207 for _, v := range atmp { 1208 x[jx] += v * xix 1209 jx += incX 1210 } 1211 if nonUnit { 1212 x[ix] *= ap[offset] 1213 } 1214 ix += incX 1215 offset += i + 2 1216 } 1217 } 1218 1219 // Stbsv solves one of the systems of equations 1220 // 1221 // A * x = b if tA == blas.NoTrans 1222 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1223 // 1224 // where A is an n×n triangular band matrix with k+1 diagonals, 1225 // and x and b are vectors. 1226 // 1227 // At entry to the function, x contains the values of b, and the result is 1228 // stored in-place into x. 1229 // 1230 // No test for singularity or near-singularity is included in this 1231 // routine. Such tests must be performed before calling this routine. 1232 // 1233 // Float32 implementations are autogenerated and not directly tested. 1234 func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 1235 if ul != blas.Lower && ul != blas.Upper { 1236 panic(badUplo) 1237 } 1238 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1239 panic(badTranspose) 1240 } 1241 if d != blas.NonUnit && d != blas.Unit { 1242 panic(badDiag) 1243 } 1244 if n < 0 { 1245 panic(nLT0) 1246 } 1247 if k < 0 { 1248 panic(kLT0) 1249 } 1250 if lda < k+1 { 1251 panic(badLdA) 1252 } 1253 if incX == 0 { 1254 panic(zeroIncX) 1255 } 1256 1257 // Quick return if possible. 1258 if n == 0 { 1259 return 1260 } 1261 1262 // For zero matrix size the following slice length checks are trivially satisfied. 1263 if len(a) < lda*(n-1)+k+1 { 1264 panic(shortA) 1265 } 1266 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1267 panic(shortX) 1268 } 1269 1270 var kx int 1271 if incX < 0 { 1272 kx = -(n - 1) * incX 1273 } 1274 nonUnit := d == blas.NonUnit 1275 // Form x = A^-1 x. 1276 // Several cases below use subslices for speed improvement. 1277 // The incX != 1 cases usually do not because incX may be negative. 1278 if tA == blas.NoTrans { 1279 if ul == blas.Upper { 1280 if incX == 1 { 1281 for i := n - 1; i >= 0; i-- { 1282 bands := k 1283 if i+bands >= n { 1284 bands = n - i - 1 1285 } 1286 atmp := a[i*lda+1:] 1287 xtmp := x[i+1 : i+bands+1] 1288 var sum float32 1289 for j, v := range xtmp { 1290 sum += v * atmp[j] 1291 } 1292 x[i] -= sum 1293 if nonUnit { 1294 x[i] /= a[i*lda] 1295 } 1296 } 1297 return 1298 } 1299 ix := kx + (n-1)*incX 1300 for i := n - 1; i >= 0; i-- { 1301 max := k + 1 1302 if i+max > n { 1303 max = n - i 1304 } 1305 atmp := a[i*lda:] 1306 var ( 1307 jx int 1308 sum float32 1309 ) 1310 for j := 1; j < max; j++ { 1311 jx += incX 1312 sum += x[ix+jx] * atmp[j] 1313 } 1314 x[ix] -= sum 1315 if nonUnit { 1316 x[ix] /= atmp[0] 1317 } 1318 ix -= incX 1319 } 1320 return 1321 } 1322 if incX == 1 { 1323 for i := 0; i < n; i++ { 1324 bands := k 1325 if i-k < 0 { 1326 bands = i 1327 } 1328 atmp := a[i*lda+k-bands:] 1329 xtmp := x[i-bands : i] 1330 var sum float32 1331 for j, v := range xtmp { 1332 sum += v * atmp[j] 1333 } 1334 x[i] -= sum 1335 if nonUnit { 1336 x[i] /= atmp[bands] 1337 } 1338 } 1339 return 1340 } 1341 ix := kx 1342 for i := 0; i < n; i++ { 1343 bands := k 1344 if i-k < 0 { 1345 bands = i 1346 } 1347 atmp := a[i*lda+k-bands:] 1348 var ( 1349 sum float32 1350 jx int 1351 ) 1352 for j := 0; j < bands; j++ { 1353 sum += x[ix-bands*incX+jx] * atmp[j] 1354 jx += incX 1355 } 1356 x[ix] -= sum 1357 if nonUnit { 1358 x[ix] /= atmp[bands] 1359 } 1360 ix += incX 1361 } 1362 return 1363 } 1364 // Cases where a is transposed. 1365 if ul == blas.Upper { 1366 if incX == 1 { 1367 for i := 0; i < n; i++ { 1368 bands := k 1369 if i-k < 0 { 1370 bands = i 1371 } 1372 var sum float32 1373 for j := 0; j < bands; j++ { 1374 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1375 } 1376 x[i] -= sum 1377 if nonUnit { 1378 x[i] /= a[i*lda] 1379 } 1380 } 1381 return 1382 } 1383 ix := kx 1384 for i := 0; i < n; i++ { 1385 bands := k 1386 if i-k < 0 { 1387 bands = i 1388 } 1389 var ( 1390 sum float32 1391 jx int 1392 ) 1393 for j := 0; j < bands; j++ { 1394 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1395 jx += incX 1396 } 1397 x[ix] -= sum 1398 if nonUnit { 1399 x[ix] /= a[i*lda] 1400 } 1401 ix += incX 1402 } 1403 return 1404 } 1405 if incX == 1 { 1406 for i := n - 1; i >= 0; i-- { 1407 bands := k 1408 if i+bands >= n { 1409 bands = n - i - 1 1410 } 1411 var sum float32 1412 xtmp := x[i+1 : i+1+bands] 1413 for j, v := range xtmp { 1414 sum += v * a[(i+j+1)*lda+k-j-1] 1415 } 1416 x[i] -= sum 1417 if nonUnit { 1418 x[i] /= a[i*lda+k] 1419 } 1420 } 1421 return 1422 } 1423 ix := kx + (n-1)*incX 1424 for i := n - 1; i >= 0; i-- { 1425 bands := k 1426 if i+bands >= n { 1427 bands = n - i - 1 1428 } 1429 var ( 1430 sum float32 1431 jx int 1432 ) 1433 for j := 0; j < bands; j++ { 1434 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1435 jx += incX 1436 } 1437 x[ix] -= sum 1438 if nonUnit { 1439 x[ix] /= a[i*lda+k] 1440 } 1441 ix -= incX 1442 } 1443 } 1444 1445 // Ssbmv performs the matrix-vector operation 1446 // 1447 // y = alpha * A * x + beta * y 1448 // 1449 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1450 // vectors, and alpha and beta are scalars. 1451 // 1452 // Float32 implementations are autogenerated and not directly tested. 1453 func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 1454 if ul != blas.Lower && ul != blas.Upper { 1455 panic(badUplo) 1456 } 1457 if n < 0 { 1458 panic(nLT0) 1459 } 1460 if k < 0 { 1461 panic(kLT0) 1462 } 1463 if lda < k+1 { 1464 panic(badLdA) 1465 } 1466 if incX == 0 { 1467 panic(zeroIncX) 1468 } 1469 if incY == 0 { 1470 panic(zeroIncY) 1471 } 1472 1473 // Quick return if possible. 1474 if n == 0 { 1475 return 1476 } 1477 1478 // For zero matrix size the following slice length checks are trivially satisfied. 1479 if len(a) < lda*(n-1)+k+1 { 1480 panic(shortA) 1481 } 1482 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1483 panic(shortX) 1484 } 1485 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1486 panic(shortY) 1487 } 1488 1489 // Quick return if possible. 1490 if alpha == 0 && beta == 1 { 1491 return 1492 } 1493 1494 // Set up indexes 1495 lenX := n 1496 lenY := n 1497 var kx, ky int 1498 if incX < 0 { 1499 kx = -(lenX - 1) * incX 1500 } 1501 if incY < 0 { 1502 ky = -(lenY - 1) * incY 1503 } 1504 1505 // Form y = beta * y. 1506 if beta != 1 { 1507 if incY == 1 { 1508 if beta == 0 { 1509 for i := range y[:n] { 1510 y[i] = 0 1511 } 1512 } else { 1513 f32.ScalUnitary(beta, y[:n]) 1514 } 1515 } else { 1516 iy := ky 1517 if beta == 0 { 1518 for i := 0; i < n; i++ { 1519 y[iy] = 0 1520 iy += incY 1521 } 1522 } else { 1523 if incY > 0 { 1524 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1525 } else { 1526 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1527 } 1528 } 1529 } 1530 } 1531 1532 if alpha == 0 { 1533 return 1534 } 1535 1536 if ul == blas.Upper { 1537 if incX == 1 { 1538 iy := ky 1539 for i := 0; i < n; i++ { 1540 atmp := a[i*lda:] 1541 tmp := alpha * x[i] 1542 sum := tmp * atmp[0] 1543 u := min(k, n-i-1) 1544 jy := incY 1545 for j := 1; j <= u; j++ { 1546 v := atmp[j] 1547 sum += alpha * x[i+j] * v 1548 y[iy+jy] += tmp * v 1549 jy += incY 1550 } 1551 y[iy] += sum 1552 iy += incY 1553 } 1554 return 1555 } 1556 ix := kx 1557 iy := ky 1558 for i := 0; i < n; i++ { 1559 atmp := a[i*lda:] 1560 tmp := alpha * x[ix] 1561 sum := tmp * atmp[0] 1562 u := min(k, n-i-1) 1563 jx := incX 1564 jy := incY 1565 for j := 1; j <= u; j++ { 1566 v := atmp[j] 1567 sum += alpha * x[ix+jx] * v 1568 y[iy+jy] += tmp * v 1569 jx += incX 1570 jy += incY 1571 } 1572 y[iy] += sum 1573 ix += incX 1574 iy += incY 1575 } 1576 return 1577 } 1578 1579 // Cases where a has bands below the diagonal. 1580 if incX == 1 { 1581 iy := ky 1582 for i := 0; i < n; i++ { 1583 l := max(0, k-i) 1584 tmp := alpha * x[i] 1585 jy := l * incY 1586 atmp := a[i*lda:] 1587 for j := l; j < k; j++ { 1588 v := atmp[j] 1589 y[iy] += alpha * v * x[i-k+j] 1590 y[iy-k*incY+jy] += tmp * v 1591 jy += incY 1592 } 1593 y[iy] += tmp * atmp[k] 1594 iy += incY 1595 } 1596 return 1597 } 1598 ix := kx 1599 iy := ky 1600 for i := 0; i < n; i++ { 1601 l := max(0, k-i) 1602 tmp := alpha * x[ix] 1603 jx := l * incX 1604 jy := l * incY 1605 atmp := a[i*lda:] 1606 for j := l; j < k; j++ { 1607 v := atmp[j] 1608 y[iy] += alpha * v * x[ix-k*incX+jx] 1609 y[iy-k*incY+jy] += tmp * v 1610 jx += incX 1611 jy += incY 1612 } 1613 y[iy] += tmp * atmp[k] 1614 ix += incX 1615 iy += incY 1616 } 1617 } 1618 1619 // Ssyr performs the symmetric rank-one update 1620 // 1621 // A += alpha * x * xᵀ 1622 // 1623 // where A is an n×n symmetric matrix, and x is a vector. 1624 // 1625 // Float32 implementations are autogenerated and not directly tested. 1626 func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { 1627 if ul != blas.Lower && ul != blas.Upper { 1628 panic(badUplo) 1629 } 1630 if n < 0 { 1631 panic(nLT0) 1632 } 1633 if lda < max(1, n) { 1634 panic(badLdA) 1635 } 1636 if incX == 0 { 1637 panic(zeroIncX) 1638 } 1639 1640 // Quick return if possible. 1641 if n == 0 { 1642 return 1643 } 1644 1645 // For zero matrix size the following slice length checks are trivially satisfied. 1646 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1647 panic(shortX) 1648 } 1649 if len(a) < lda*(n-1)+n { 1650 panic(shortA) 1651 } 1652 1653 // Quick return if possible. 1654 if alpha == 0 { 1655 return 1656 } 1657 1658 lenX := n 1659 var kx int 1660 if incX < 0 { 1661 kx = -(lenX - 1) * incX 1662 } 1663 if ul == blas.Upper { 1664 if incX == 1 { 1665 for i := 0; i < n; i++ { 1666 tmp := x[i] * alpha 1667 if tmp != 0 { 1668 atmp := a[i*lda+i : i*lda+n] 1669 xtmp := x[i:n] 1670 for j, v := range xtmp { 1671 atmp[j] += v * tmp 1672 } 1673 } 1674 } 1675 return 1676 } 1677 ix := kx 1678 for i := 0; i < n; i++ { 1679 tmp := x[ix] * alpha 1680 if tmp != 0 { 1681 jx := ix 1682 atmp := a[i*lda:] 1683 for j := i; j < n; j++ { 1684 atmp[j] += x[jx] * tmp 1685 jx += incX 1686 } 1687 } 1688 ix += incX 1689 } 1690 return 1691 } 1692 // Cases where a is lower triangular. 1693 if incX == 1 { 1694 for i := 0; i < n; i++ { 1695 tmp := x[i] * alpha 1696 if tmp != 0 { 1697 atmp := a[i*lda:] 1698 xtmp := x[:i+1] 1699 for j, v := range xtmp { 1700 atmp[j] += tmp * v 1701 } 1702 } 1703 } 1704 return 1705 } 1706 ix := kx 1707 for i := 0; i < n; i++ { 1708 tmp := x[ix] * alpha 1709 if tmp != 0 { 1710 atmp := a[i*lda:] 1711 jx := kx 1712 for j := 0; j < i+1; j++ { 1713 atmp[j] += tmp * x[jx] 1714 jx += incX 1715 } 1716 } 1717 ix += incX 1718 } 1719 } 1720 1721 // Ssyr2 performs the symmetric rank-two update 1722 // 1723 // A += alpha * x * yᵀ + alpha * y * xᵀ 1724 // 1725 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1726 // 1727 // Float32 implementations are autogenerated and not directly tested. 1728 func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 1729 if ul != blas.Lower && ul != blas.Upper { 1730 panic(badUplo) 1731 } 1732 if n < 0 { 1733 panic(nLT0) 1734 } 1735 if lda < max(1, n) { 1736 panic(badLdA) 1737 } 1738 if incX == 0 { 1739 panic(zeroIncX) 1740 } 1741 if incY == 0 { 1742 panic(zeroIncY) 1743 } 1744 1745 // Quick return if possible. 1746 if n == 0 { 1747 return 1748 } 1749 1750 // For zero matrix size the following slice length checks are trivially satisfied. 1751 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1752 panic(shortX) 1753 } 1754 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1755 panic(shortY) 1756 } 1757 if len(a) < lda*(n-1)+n { 1758 panic(shortA) 1759 } 1760 1761 // Quick return if possible. 1762 if alpha == 0 { 1763 return 1764 } 1765 1766 var ky, kx int 1767 if incY < 0 { 1768 ky = -(n - 1) * incY 1769 } 1770 if incX < 0 { 1771 kx = -(n - 1) * incX 1772 } 1773 if ul == blas.Upper { 1774 if incX == 1 && incY == 1 { 1775 for i := 0; i < n; i++ { 1776 xi := x[i] 1777 yi := y[i] 1778 atmp := a[i*lda:] 1779 for j := i; j < n; j++ { 1780 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1781 } 1782 } 1783 return 1784 } 1785 ix := kx 1786 iy := ky 1787 for i := 0; i < n; i++ { 1788 jx := kx + i*incX 1789 jy := ky + i*incY 1790 xi := x[ix] 1791 yi := y[iy] 1792 atmp := a[i*lda:] 1793 for j := i; j < n; j++ { 1794 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1795 jx += incX 1796 jy += incY 1797 } 1798 ix += incX 1799 iy += incY 1800 } 1801 return 1802 } 1803 if incX == 1 && incY == 1 { 1804 for i := 0; i < n; i++ { 1805 xi := x[i] 1806 yi := y[i] 1807 atmp := a[i*lda:] 1808 for j := 0; j <= i; j++ { 1809 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1810 } 1811 } 1812 return 1813 } 1814 ix := kx 1815 iy := ky 1816 for i := 0; i < n; i++ { 1817 jx := kx 1818 jy := ky 1819 xi := x[ix] 1820 yi := y[iy] 1821 atmp := a[i*lda:] 1822 for j := 0; j <= i; j++ { 1823 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1824 jx += incX 1825 jy += incY 1826 } 1827 ix += incX 1828 iy += incY 1829 } 1830 } 1831 1832 // Stpsv solves one of the systems of equations 1833 // 1834 // A * x = b if tA == blas.NoTrans 1835 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1836 // 1837 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1838 // 1839 // At entry to the function, x contains the values of b, and the result is 1840 // stored in-place into x. 1841 // 1842 // No test for singularity or near-singularity is included in this 1843 // routine. Such tests must be performed before calling this routine. 1844 // 1845 // Float32 implementations are autogenerated and not directly tested. 1846 func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 1847 if ul != blas.Lower && ul != blas.Upper { 1848 panic(badUplo) 1849 } 1850 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1851 panic(badTranspose) 1852 } 1853 if d != blas.NonUnit && d != blas.Unit { 1854 panic(badDiag) 1855 } 1856 if n < 0 { 1857 panic(nLT0) 1858 } 1859 if incX == 0 { 1860 panic(zeroIncX) 1861 } 1862 1863 // Quick return if possible. 1864 if n == 0 { 1865 return 1866 } 1867 1868 // For zero matrix size the following slice length checks are trivially satisfied. 1869 if len(ap) < n*(n+1)/2 { 1870 panic(shortAP) 1871 } 1872 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1873 panic(shortX) 1874 } 1875 1876 var kx int 1877 if incX < 0 { 1878 kx = -(n - 1) * incX 1879 } 1880 1881 nonUnit := d == blas.NonUnit 1882 var offset int // Offset is the index of (i,i) 1883 if tA == blas.NoTrans { 1884 if ul == blas.Upper { 1885 offset = n*(n+1)/2 - 1 1886 if incX == 1 { 1887 for i := n - 1; i >= 0; i-- { 1888 atmp := ap[offset+1 : offset+n-i] 1889 xtmp := x[i+1:] 1890 var sum float32 1891 for j, v := range atmp { 1892 sum += v * xtmp[j] 1893 } 1894 x[i] -= sum 1895 if nonUnit { 1896 x[i] /= ap[offset] 1897 } 1898 offset -= n - i + 1 1899 } 1900 return 1901 } 1902 ix := kx + (n-1)*incX 1903 for i := n - 1; i >= 0; i-- { 1904 atmp := ap[offset+1 : offset+n-i] 1905 jx := kx + (i+1)*incX 1906 var sum float32 1907 for _, v := range atmp { 1908 sum += v * x[jx] 1909 jx += incX 1910 } 1911 x[ix] -= sum 1912 if nonUnit { 1913 x[ix] /= ap[offset] 1914 } 1915 ix -= incX 1916 offset -= n - i + 1 1917 } 1918 return 1919 } 1920 if incX == 1 { 1921 for i := 0; i < n; i++ { 1922 atmp := ap[offset-i : offset] 1923 var sum float32 1924 for j, v := range atmp { 1925 sum += v * x[j] 1926 } 1927 x[i] -= sum 1928 if nonUnit { 1929 x[i] /= ap[offset] 1930 } 1931 offset += i + 2 1932 } 1933 return 1934 } 1935 ix := kx 1936 for i := 0; i < n; i++ { 1937 jx := kx 1938 atmp := ap[offset-i : offset] 1939 var sum float32 1940 for _, v := range atmp { 1941 sum += v * x[jx] 1942 jx += incX 1943 } 1944 x[ix] -= sum 1945 if nonUnit { 1946 x[ix] /= ap[offset] 1947 } 1948 ix += incX 1949 offset += i + 2 1950 } 1951 return 1952 } 1953 // Cases where ap is transposed. 1954 if ul == blas.Upper { 1955 if incX == 1 { 1956 for i := 0; i < n; i++ { 1957 if nonUnit { 1958 x[i] /= ap[offset] 1959 } 1960 xi := x[i] 1961 atmp := ap[offset+1 : offset+n-i] 1962 xtmp := x[i+1:] 1963 for j, v := range atmp { 1964 xtmp[j] -= v * xi 1965 } 1966 offset += n - i 1967 } 1968 return 1969 } 1970 ix := kx 1971 for i := 0; i < n; i++ { 1972 if nonUnit { 1973 x[ix] /= ap[offset] 1974 } 1975 xix := x[ix] 1976 atmp := ap[offset+1 : offset+n-i] 1977 jx := kx + (i+1)*incX 1978 for _, v := range atmp { 1979 x[jx] -= v * xix 1980 jx += incX 1981 } 1982 ix += incX 1983 offset += n - i 1984 } 1985 return 1986 } 1987 if incX == 1 { 1988 offset = n*(n+1)/2 - 1 1989 for i := n - 1; i >= 0; i-- { 1990 if nonUnit { 1991 x[i] /= ap[offset] 1992 } 1993 xi := x[i] 1994 atmp := ap[offset-i : offset] 1995 for j, v := range atmp { 1996 x[j] -= v * xi 1997 } 1998 offset -= i + 1 1999 } 2000 return 2001 } 2002 ix := kx + (n-1)*incX 2003 offset = n*(n+1)/2 - 1 2004 for i := n - 1; i >= 0; i-- { 2005 if nonUnit { 2006 x[ix] /= ap[offset] 2007 } 2008 xix := x[ix] 2009 atmp := ap[offset-i : offset] 2010 jx := kx 2011 for _, v := range atmp { 2012 x[jx] -= v * xix 2013 jx += incX 2014 } 2015 ix -= incX 2016 offset -= i + 1 2017 } 2018 } 2019 2020 // Sspmv performs the matrix-vector operation 2021 // 2022 // y = alpha * A * x + beta * y 2023 // 2024 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2025 // and alpha and beta are scalars. 2026 // 2027 // Float32 implementations are autogenerated and not directly tested. 2028 func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) { 2029 if ul != blas.Lower && ul != blas.Upper { 2030 panic(badUplo) 2031 } 2032 if n < 0 { 2033 panic(nLT0) 2034 } 2035 if incX == 0 { 2036 panic(zeroIncX) 2037 } 2038 if incY == 0 { 2039 panic(zeroIncY) 2040 } 2041 2042 // Quick return if possible. 2043 if n == 0 { 2044 return 2045 } 2046 2047 // For zero matrix size the following slice length checks are trivially satisfied. 2048 if len(ap) < n*(n+1)/2 { 2049 panic(shortAP) 2050 } 2051 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2052 panic(shortX) 2053 } 2054 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2055 panic(shortY) 2056 } 2057 2058 // Quick return if possible. 2059 if alpha == 0 && beta == 1 { 2060 return 2061 } 2062 2063 // Set up start points 2064 var kx, ky int 2065 if incX < 0 { 2066 kx = -(n - 1) * incX 2067 } 2068 if incY < 0 { 2069 ky = -(n - 1) * incY 2070 } 2071 2072 // Form y = beta * y. 2073 if beta != 1 { 2074 if incY == 1 { 2075 if beta == 0 { 2076 for i := range y[:n] { 2077 y[i] = 0 2078 } 2079 } else { 2080 f32.ScalUnitary(beta, y[:n]) 2081 } 2082 } else { 2083 iy := ky 2084 if beta == 0 { 2085 for i := 0; i < n; i++ { 2086 y[iy] = 0 2087 iy += incY 2088 } 2089 } else { 2090 if incY > 0 { 2091 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 2092 } else { 2093 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 2094 } 2095 } 2096 } 2097 } 2098 2099 if alpha == 0 { 2100 return 2101 } 2102 2103 if n == 1 { 2104 y[0] += alpha * ap[0] * x[0] 2105 return 2106 } 2107 var offset int // Offset is the index of (i,i). 2108 if ul == blas.Upper { 2109 if incX == 1 { 2110 iy := ky 2111 for i := 0; i < n; i++ { 2112 xv := x[i] * alpha 2113 sum := ap[offset] * x[i] 2114 atmp := ap[offset+1 : offset+n-i] 2115 xtmp := x[i+1:] 2116 jy := ky + (i+1)*incY 2117 for j, v := range atmp { 2118 sum += v * xtmp[j] 2119 y[jy] += v * xv 2120 jy += incY 2121 } 2122 y[iy] += alpha * sum 2123 iy += incY 2124 offset += n - i 2125 } 2126 return 2127 } 2128 ix := kx 2129 iy := ky 2130 for i := 0; i < n; i++ { 2131 xv := x[ix] * alpha 2132 sum := ap[offset] * x[ix] 2133 atmp := ap[offset+1 : offset+n-i] 2134 jx := kx + (i+1)*incX 2135 jy := ky + (i+1)*incY 2136 for _, v := range atmp { 2137 sum += v * x[jx] 2138 y[jy] += v * xv 2139 jx += incX 2140 jy += incY 2141 } 2142 y[iy] += alpha * sum 2143 ix += incX 2144 iy += incY 2145 offset += n - i 2146 } 2147 return 2148 } 2149 if incX == 1 { 2150 iy := ky 2151 for i := 0; i < n; i++ { 2152 xv := x[i] * alpha 2153 atmp := ap[offset-i : offset] 2154 jy := ky 2155 var sum float32 2156 for j, v := range atmp { 2157 sum += v * x[j] 2158 y[jy] += v * xv 2159 jy += incY 2160 } 2161 sum += ap[offset] * x[i] 2162 y[iy] += alpha * sum 2163 iy += incY 2164 offset += i + 2 2165 } 2166 return 2167 } 2168 ix := kx 2169 iy := ky 2170 for i := 0; i < n; i++ { 2171 xv := x[ix] * alpha 2172 atmp := ap[offset-i : offset] 2173 jx := kx 2174 jy := ky 2175 var sum float32 2176 for _, v := range atmp { 2177 sum += v * x[jx] 2178 y[jy] += v * xv 2179 jx += incX 2180 jy += incY 2181 } 2182 2183 sum += ap[offset] * x[ix] 2184 y[iy] += alpha * sum 2185 ix += incX 2186 iy += incY 2187 offset += i + 2 2188 } 2189 } 2190 2191 // Sspr performs the symmetric rank-one operation 2192 // 2193 // A += alpha * x * xᵀ 2194 // 2195 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2196 // alpha is a scalar. 2197 // 2198 // Float32 implementations are autogenerated and not directly tested. 2199 func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) { 2200 if ul != blas.Lower && ul != blas.Upper { 2201 panic(badUplo) 2202 } 2203 if n < 0 { 2204 panic(nLT0) 2205 } 2206 if incX == 0 { 2207 panic(zeroIncX) 2208 } 2209 2210 // Quick return if possible. 2211 if n == 0 { 2212 return 2213 } 2214 2215 // For zero matrix size the following slice length checks are trivially satisfied. 2216 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2217 panic(shortX) 2218 } 2219 if len(ap) < n*(n+1)/2 { 2220 panic(shortAP) 2221 } 2222 2223 // Quick return if possible. 2224 if alpha == 0 { 2225 return 2226 } 2227 2228 lenX := n 2229 var kx int 2230 if incX < 0 { 2231 kx = -(lenX - 1) * incX 2232 } 2233 var offset int // Offset is the index of (i,i). 2234 if ul == blas.Upper { 2235 if incX == 1 { 2236 for i := 0; i < n; i++ { 2237 atmp := ap[offset:] 2238 xv := alpha * x[i] 2239 xtmp := x[i:n] 2240 for j, v := range xtmp { 2241 atmp[j] += xv * v 2242 } 2243 offset += n - i 2244 } 2245 return 2246 } 2247 ix := kx 2248 for i := 0; i < n; i++ { 2249 jx := kx + i*incX 2250 atmp := ap[offset:] 2251 xv := alpha * x[ix] 2252 for j := 0; j < n-i; j++ { 2253 atmp[j] += xv * x[jx] 2254 jx += incX 2255 } 2256 ix += incX 2257 offset += n - i 2258 } 2259 return 2260 } 2261 if incX == 1 { 2262 for i := 0; i < n; i++ { 2263 atmp := ap[offset-i:] 2264 xv := alpha * x[i] 2265 xtmp := x[:i+1] 2266 for j, v := range xtmp { 2267 atmp[j] += xv * v 2268 } 2269 offset += i + 2 2270 } 2271 return 2272 } 2273 ix := kx 2274 for i := 0; i < n; i++ { 2275 jx := kx 2276 atmp := ap[offset-i:] 2277 xv := alpha * x[ix] 2278 for j := 0; j <= i; j++ { 2279 atmp[j] += xv * x[jx] 2280 jx += incX 2281 } 2282 ix += incX 2283 offset += i + 2 2284 } 2285 } 2286 2287 // Sspr2 performs the symmetric rank-2 update 2288 // 2289 // A += alpha * x * yᵀ + alpha * y * xᵀ 2290 // 2291 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2292 // and alpha is a scalar. 2293 // 2294 // Float32 implementations are autogenerated and not directly tested. 2295 func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { 2296 if ul != blas.Lower && ul != blas.Upper { 2297 panic(badUplo) 2298 } 2299 if n < 0 { 2300 panic(nLT0) 2301 } 2302 if incX == 0 { 2303 panic(zeroIncX) 2304 } 2305 if incY == 0 { 2306 panic(zeroIncY) 2307 } 2308 2309 // Quick return if possible. 2310 if n == 0 { 2311 return 2312 } 2313 2314 // For zero matrix size the following slice length checks are trivially satisfied. 2315 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2316 panic(shortX) 2317 } 2318 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2319 panic(shortY) 2320 } 2321 if len(ap) < n*(n+1)/2 { 2322 panic(shortAP) 2323 } 2324 2325 // Quick return if possible. 2326 if alpha == 0 { 2327 return 2328 } 2329 2330 var ky, kx int 2331 if incY < 0 { 2332 ky = -(n - 1) * incY 2333 } 2334 if incX < 0 { 2335 kx = -(n - 1) * incX 2336 } 2337 var offset int // Offset is the index of (i,i). 2338 if ul == blas.Upper { 2339 if incX == 1 && incY == 1 { 2340 for i := 0; i < n; i++ { 2341 atmp := ap[offset:] 2342 xi := x[i] 2343 yi := y[i] 2344 xtmp := x[i:n] 2345 ytmp := y[i:n] 2346 for j, v := range xtmp { 2347 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2348 } 2349 offset += n - i 2350 } 2351 return 2352 } 2353 ix := kx 2354 iy := ky 2355 for i := 0; i < n; i++ { 2356 jx := kx + i*incX 2357 jy := ky + i*incY 2358 atmp := ap[offset:] 2359 xi := x[ix] 2360 yi := y[iy] 2361 for j := 0; j < n-i; j++ { 2362 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2363 jx += incX 2364 jy += incY 2365 } 2366 ix += incX 2367 iy += incY 2368 offset += n - i 2369 } 2370 return 2371 } 2372 if incX == 1 && incY == 1 { 2373 for i := 0; i < n; i++ { 2374 atmp := ap[offset-i:] 2375 xi := x[i] 2376 yi := y[i] 2377 xtmp := x[:i+1] 2378 for j, v := range xtmp { 2379 atmp[j] += alpha * (xi*y[j] + v*yi) 2380 } 2381 offset += i + 2 2382 } 2383 return 2384 } 2385 ix := kx 2386 iy := ky 2387 for i := 0; i < n; i++ { 2388 jx := kx 2389 jy := ky 2390 atmp := ap[offset-i:] 2391 for j := 0; j <= i; j++ { 2392 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2393 jx += incX 2394 jy += incY 2395 } 2396 ix += incX 2397 iy += incY 2398 offset += i + 2 2399 } 2400 }