github.com/jingcheng-WU/gonum@v0.9.1-0.20210323123734-f1a2a11a8f7b/blas/gonum/level2float32.go (about) 1 // Code generated by "go generate github.com/jingcheng-WU/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2014 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 "github.com/jingcheng-WU/gonum/blas" 11 "github.com/jingcheng-WU/gonum/internal/asm/f32" 12 ) 13 14 var _ blas.Float32Level2 = Implementation{} 15 16 // Sger performs the rank-one operation 17 // A += alpha * x * yᵀ 18 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 19 // 20 // Float32 implementations are autogenerated and not directly tested. 21 func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 22 if m < 0 { 23 panic(mLT0) 24 } 25 if n < 0 { 26 panic(nLT0) 27 } 28 if lda < max(1, n) { 29 panic(badLdA) 30 } 31 if incX == 0 { 32 panic(zeroIncX) 33 } 34 if incY == 0 { 35 panic(zeroIncY) 36 } 37 38 // Quick return if possible. 39 if m == 0 || n == 0 { 40 return 41 } 42 43 // For zero matrix size the following slice length checks are trivially satisfied. 44 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 45 panic(shortX) 46 } 47 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 48 panic(shortY) 49 } 50 if len(a) < lda*(m-1)+n { 51 panic(shortA) 52 } 53 54 // Quick return if possible. 55 if alpha == 0 { 56 return 57 } 58 f32.Ger(uintptr(m), uintptr(n), 59 alpha, 60 x, uintptr(incX), 61 y, uintptr(incY), 62 a, uintptr(lda)) 63 } 64 65 // Sgbmv performs one of the matrix-vector operations 66 // y = alpha * A * x + beta * y if tA == blas.NoTrans 67 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 68 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 69 // x and y are vectors, and alpha and beta are scalars. 70 // 71 // Float32 implementations are autogenerated and not directly tested. 72 func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 73 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 74 panic(badTranspose) 75 } 76 if m < 0 { 77 panic(mLT0) 78 } 79 if n < 0 { 80 panic(nLT0) 81 } 82 if kL < 0 { 83 panic(kLLT0) 84 } 85 if kU < 0 { 86 panic(kULT0) 87 } 88 if lda < kL+kU+1 { 89 panic(badLdA) 90 } 91 if incX == 0 { 92 panic(zeroIncX) 93 } 94 if incY == 0 { 95 panic(zeroIncY) 96 } 97 98 // Quick return if possible. 99 if m == 0 || n == 0 { 100 return 101 } 102 103 // For zero matrix size the following slice length checks are trivially satisfied. 104 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 105 panic(shortA) 106 } 107 lenX := m 108 lenY := n 109 if tA == blas.NoTrans { 110 lenX = n 111 lenY = m 112 } 113 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 114 panic(shortX) 115 } 116 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 117 panic(shortY) 118 } 119 120 // Quick return if possible. 121 if alpha == 0 && beta == 1 { 122 return 123 } 124 125 var kx, ky int 126 if incX < 0 { 127 kx = -(lenX - 1) * incX 128 } 129 if incY < 0 { 130 ky = -(lenY - 1) * incY 131 } 132 133 // Form y = beta * y. 134 if beta != 1 { 135 if incY == 1 { 136 if beta == 0 { 137 for i := range y[:lenY] { 138 y[i] = 0 139 } 140 } else { 141 f32.ScalUnitary(beta, y[:lenY]) 142 } 143 } else { 144 iy := ky 145 if beta == 0 { 146 for i := 0; i < lenY; i++ { 147 y[iy] = 0 148 iy += incY 149 } 150 } else { 151 if incY > 0 { 152 f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 153 } else { 154 f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 155 } 156 } 157 } 158 } 159 160 if alpha == 0 { 161 return 162 } 163 164 // i and j are indices of the compacted banded matrix. 165 // off is the offset into the dense matrix (off + j = densej) 166 nCol := kU + 1 + kL 167 if tA == blas.NoTrans { 168 iy := ky 169 if incX == 1 { 170 for i := 0; i < min(m, n+kL); i++ { 171 l := max(0, kL-i) 172 u := min(nCol, n+kL-i) 173 off := max(0, i-kL) 174 atmp := a[i*lda+l : i*lda+u] 175 xtmp := x[off : off+u-l] 176 var sum float32 177 for j, v := range atmp { 178 sum += xtmp[j] * v 179 } 180 y[iy] += sum * alpha 181 iy += incY 182 } 183 return 184 } 185 for i := 0; i < min(m, n+kL); i++ { 186 l := max(0, kL-i) 187 u := min(nCol, n+kL-i) 188 off := max(0, i-kL) 189 atmp := a[i*lda+l : i*lda+u] 190 jx := kx 191 var sum float32 192 for _, v := range atmp { 193 sum += x[off*incX+jx] * v 194 jx += incX 195 } 196 y[iy] += sum * alpha 197 iy += incY 198 } 199 return 200 } 201 if incX == 1 { 202 for i := 0; i < min(m, n+kL); i++ { 203 l := max(0, kL-i) 204 u := min(nCol, n+kL-i) 205 off := max(0, i-kL) 206 atmp := a[i*lda+l : i*lda+u] 207 tmp := alpha * x[i] 208 jy := ky 209 for _, v := range atmp { 210 y[jy+off*incY] += tmp * v 211 jy += incY 212 } 213 } 214 return 215 } 216 ix := kx 217 for i := 0; i < min(m, n+kL); i++ { 218 l := max(0, kL-i) 219 u := min(nCol, n+kL-i) 220 off := max(0, i-kL) 221 atmp := a[i*lda+l : i*lda+u] 222 tmp := alpha * x[ix] 223 jy := ky 224 for _, v := range atmp { 225 y[jy+off*incY] += tmp * v 226 jy += incY 227 } 228 ix += incX 229 } 230 } 231 232 // Strmv performs one of the matrix-vector operations 233 // x = A * x if tA == blas.NoTrans 234 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 235 // where A is an n×n triangular matrix, and x is a vector. 236 // 237 // Float32 implementations are autogenerated and not directly tested. 238 func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 239 if ul != blas.Lower && ul != blas.Upper { 240 panic(badUplo) 241 } 242 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 243 panic(badTranspose) 244 } 245 if d != blas.NonUnit && d != blas.Unit { 246 panic(badDiag) 247 } 248 if n < 0 { 249 panic(nLT0) 250 } 251 if lda < max(1, n) { 252 panic(badLdA) 253 } 254 if incX == 0 { 255 panic(zeroIncX) 256 } 257 258 // Quick return if possible. 259 if n == 0 { 260 return 261 } 262 263 // For zero matrix size the following slice length checks are trivially satisfied. 264 if len(a) < lda*(n-1)+n { 265 panic(shortA) 266 } 267 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 268 panic(shortX) 269 } 270 271 nonUnit := d != blas.Unit 272 if n == 1 { 273 if nonUnit { 274 x[0] *= a[0] 275 } 276 return 277 } 278 var kx int 279 if incX <= 0 { 280 kx = -(n - 1) * incX 281 } 282 if tA == blas.NoTrans { 283 if ul == blas.Upper { 284 if incX == 1 { 285 for i := 0; i < n; i++ { 286 ilda := i * lda 287 var tmp float32 288 if nonUnit { 289 tmp = a[ilda+i] * x[i] 290 } else { 291 tmp = x[i] 292 } 293 x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 294 } 295 return 296 } 297 ix := kx 298 for i := 0; i < n; i++ { 299 ilda := i * lda 300 var tmp float32 301 if nonUnit { 302 tmp = a[ilda+i] * x[ix] 303 } else { 304 tmp = x[ix] 305 } 306 x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 307 ix += incX 308 } 309 return 310 } 311 if incX == 1 { 312 for i := n - 1; i >= 0; i-- { 313 ilda := i * lda 314 var tmp float32 315 if nonUnit { 316 tmp += a[ilda+i] * x[i] 317 } else { 318 tmp = x[i] 319 } 320 x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i]) 321 } 322 return 323 } 324 ix := kx + (n-1)*incX 325 for i := n - 1; i >= 0; i-- { 326 ilda := i * lda 327 var tmp float32 328 if nonUnit { 329 tmp = a[ilda+i] * x[ix] 330 } else { 331 tmp = x[ix] 332 } 333 x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 334 ix -= incX 335 } 336 return 337 } 338 // Cases where a is transposed. 339 if ul == blas.Upper { 340 if incX == 1 { 341 for i := n - 1; i >= 0; i-- { 342 ilda := i * lda 343 xi := x[i] 344 f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 345 if nonUnit { 346 x[i] *= a[ilda+i] 347 } 348 } 349 return 350 } 351 ix := kx + (n-1)*incX 352 for i := n - 1; i >= 0; i-- { 353 ilda := i * lda 354 xi := x[ix] 355 f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 356 if nonUnit { 357 x[ix] *= a[ilda+i] 358 } 359 ix -= incX 360 } 361 return 362 } 363 if incX == 1 { 364 for i := 0; i < n; i++ { 365 ilda := i * lda 366 xi := x[i] 367 f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 368 if nonUnit { 369 x[i] *= a[i*lda+i] 370 } 371 } 372 return 373 } 374 ix := kx 375 for i := 0; i < n; i++ { 376 ilda := i * lda 377 xi := x[ix] 378 f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 379 if nonUnit { 380 x[ix] *= a[ilda+i] 381 } 382 ix += incX 383 } 384 } 385 386 // Strsv solves one of the systems of equations 387 // A * x = b if tA == blas.NoTrans 388 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 389 // where A is an n×n triangular matrix, and x and b are vectors. 390 // 391 // At entry to the function, x contains the values of b, and the result is 392 // stored in-place into x. 393 // 394 // No test for singularity or near-singularity is included in this 395 // routine. Such tests must be performed before calling this routine. 396 // 397 // Float32 implementations are autogenerated and not directly tested. 398 func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 399 if ul != blas.Lower && ul != blas.Upper { 400 panic(badUplo) 401 } 402 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 403 panic(badTranspose) 404 } 405 if d != blas.NonUnit && d != blas.Unit { 406 panic(badDiag) 407 } 408 if n < 0 { 409 panic(nLT0) 410 } 411 if lda < max(1, n) { 412 panic(badLdA) 413 } 414 if incX == 0 { 415 panic(zeroIncX) 416 } 417 418 // Quick return if possible. 419 if n == 0 { 420 return 421 } 422 423 // For zero matrix size the following slice length checks are trivially satisfied. 424 if len(a) < lda*(n-1)+n { 425 panic(shortA) 426 } 427 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 428 panic(shortX) 429 } 430 431 if n == 1 { 432 if d == blas.NonUnit { 433 x[0] /= a[0] 434 } 435 return 436 } 437 438 var kx int 439 if incX < 0 { 440 kx = -(n - 1) * incX 441 } 442 nonUnit := d == blas.NonUnit 443 if tA == blas.NoTrans { 444 if ul == blas.Upper { 445 if incX == 1 { 446 for i := n - 1; i >= 0; i-- { 447 var sum float32 448 atmp := a[i*lda+i+1 : i*lda+n] 449 for j, v := range atmp { 450 jv := i + j + 1 451 sum += x[jv] * v 452 } 453 x[i] -= sum 454 if nonUnit { 455 x[i] /= a[i*lda+i] 456 } 457 } 458 return 459 } 460 ix := kx + (n-1)*incX 461 for i := n - 1; i >= 0; i-- { 462 var sum float32 463 jx := ix + incX 464 atmp := a[i*lda+i+1 : i*lda+n] 465 for _, v := range atmp { 466 sum += x[jx] * v 467 jx += incX 468 } 469 x[ix] -= sum 470 if nonUnit { 471 x[ix] /= a[i*lda+i] 472 } 473 ix -= incX 474 } 475 return 476 } 477 if incX == 1 { 478 for i := 0; i < n; i++ { 479 var sum float32 480 atmp := a[i*lda : i*lda+i] 481 for j, v := range atmp { 482 sum += x[j] * v 483 } 484 x[i] -= sum 485 if nonUnit { 486 x[i] /= a[i*lda+i] 487 } 488 } 489 return 490 } 491 ix := kx 492 for i := 0; i < n; i++ { 493 jx := kx 494 var sum float32 495 atmp := a[i*lda : i*lda+i] 496 for _, v := range atmp { 497 sum += x[jx] * v 498 jx += incX 499 } 500 x[ix] -= sum 501 if nonUnit { 502 x[ix] /= a[i*lda+i] 503 } 504 ix += incX 505 } 506 return 507 } 508 // Cases where a is transposed. 509 if ul == blas.Upper { 510 if incX == 1 { 511 for i := 0; i < n; i++ { 512 if nonUnit { 513 x[i] /= a[i*lda+i] 514 } 515 xi := x[i] 516 atmp := a[i*lda+i+1 : i*lda+n] 517 for j, v := range atmp { 518 jv := j + i + 1 519 x[jv] -= v * xi 520 } 521 } 522 return 523 } 524 ix := kx 525 for i := 0; i < n; i++ { 526 if nonUnit { 527 x[ix] /= a[i*lda+i] 528 } 529 xi := x[ix] 530 jx := kx + (i+1)*incX 531 atmp := a[i*lda+i+1 : i*lda+n] 532 for _, v := range atmp { 533 x[jx] -= v * xi 534 jx += incX 535 } 536 ix += incX 537 } 538 return 539 } 540 if incX == 1 { 541 for i := n - 1; i >= 0; i-- { 542 if nonUnit { 543 x[i] /= a[i*lda+i] 544 } 545 xi := x[i] 546 atmp := a[i*lda : i*lda+i] 547 for j, v := range atmp { 548 x[j] -= v * xi 549 } 550 } 551 return 552 } 553 ix := kx + (n-1)*incX 554 for i := n - 1; i >= 0; i-- { 555 if nonUnit { 556 x[ix] /= a[i*lda+i] 557 } 558 xi := x[ix] 559 jx := kx 560 atmp := a[i*lda : i*lda+i] 561 for _, v := range atmp { 562 x[jx] -= v * xi 563 jx += incX 564 } 565 ix -= incX 566 } 567 } 568 569 // Ssymv performs the matrix-vector operation 570 // y = alpha * A * x + beta * y 571 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 572 // beta are scalars. 573 // 574 // Float32 implementations are autogenerated and not directly tested. 575 func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 576 if ul != blas.Lower && ul != blas.Upper { 577 panic(badUplo) 578 } 579 if n < 0 { 580 panic(nLT0) 581 } 582 if lda < max(1, n) { 583 panic(badLdA) 584 } 585 if incX == 0 { 586 panic(zeroIncX) 587 } 588 if incY == 0 { 589 panic(zeroIncY) 590 } 591 592 // Quick return if possible. 593 if n == 0 { 594 return 595 } 596 597 // For zero matrix size the following slice length checks are trivially satisfied. 598 if len(a) < lda*(n-1)+n { 599 panic(shortA) 600 } 601 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 602 panic(shortX) 603 } 604 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 605 panic(shortY) 606 } 607 608 // Quick return if possible. 609 if alpha == 0 && beta == 1 { 610 return 611 } 612 613 // Set up start points 614 var kx, ky int 615 if incX < 0 { 616 kx = -(n - 1) * incX 617 } 618 if incY < 0 { 619 ky = -(n - 1) * incY 620 } 621 622 // Form y = beta * y 623 if beta != 1 { 624 if incY == 1 { 625 if beta == 0 { 626 for i := range y[:n] { 627 y[i] = 0 628 } 629 } else { 630 f32.ScalUnitary(beta, y[:n]) 631 } 632 } else { 633 iy := ky 634 if beta == 0 { 635 for i := 0; i < n; i++ { 636 y[iy] = 0 637 iy += incY 638 } 639 } else { 640 if incY > 0 { 641 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 642 } else { 643 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 644 } 645 } 646 } 647 } 648 649 if alpha == 0 { 650 return 651 } 652 653 if n == 1 { 654 y[0] += alpha * a[0] * x[0] 655 return 656 } 657 658 if ul == blas.Upper { 659 if incX == 1 { 660 iy := ky 661 for i := 0; i < n; i++ { 662 xv := x[i] * alpha 663 sum := x[i] * a[i*lda+i] 664 jy := ky + (i+1)*incY 665 atmp := a[i*lda+i+1 : i*lda+n] 666 for j, v := range atmp { 667 jp := j + i + 1 668 sum += x[jp] * v 669 y[jy] += xv * v 670 jy += incY 671 } 672 y[iy] += alpha * sum 673 iy += incY 674 } 675 return 676 } 677 ix := kx 678 iy := ky 679 for i := 0; i < n; i++ { 680 xv := x[ix] * alpha 681 sum := x[ix] * a[i*lda+i] 682 jx := kx + (i+1)*incX 683 jy := ky + (i+1)*incY 684 atmp := a[i*lda+i+1 : i*lda+n] 685 for _, v := range atmp { 686 sum += x[jx] * v 687 y[jy] += xv * v 688 jx += incX 689 jy += incY 690 } 691 y[iy] += alpha * sum 692 ix += incX 693 iy += incY 694 } 695 return 696 } 697 // Cases where a is lower triangular. 698 if incX == 1 { 699 iy := ky 700 for i := 0; i < n; i++ { 701 jy := ky 702 xv := alpha * x[i] 703 atmp := a[i*lda : i*lda+i] 704 var sum float32 705 for j, v := range atmp { 706 sum += x[j] * v 707 y[jy] += xv * v 708 jy += incY 709 } 710 sum += x[i] * a[i*lda+i] 711 sum *= alpha 712 y[iy] += sum 713 iy += incY 714 } 715 return 716 } 717 ix := kx 718 iy := ky 719 for i := 0; i < n; i++ { 720 jx := kx 721 jy := ky 722 xv := alpha * x[ix] 723 atmp := a[i*lda : i*lda+i] 724 var sum float32 725 for _, v := range atmp { 726 sum += x[jx] * v 727 y[jy] += xv * v 728 jx += incX 729 jy += incY 730 } 731 sum += x[ix] * a[i*lda+i] 732 sum *= alpha 733 y[iy] += sum 734 ix += incX 735 iy += incY 736 } 737 } 738 739 // Stbmv performs one of the matrix-vector operations 740 // x = A * x if tA == blas.NoTrans 741 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 742 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 743 // 744 // Float32 implementations are autogenerated and not directly tested. 745 func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 746 if ul != blas.Lower && ul != blas.Upper { 747 panic(badUplo) 748 } 749 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 750 panic(badTranspose) 751 } 752 if d != blas.NonUnit && d != blas.Unit { 753 panic(badDiag) 754 } 755 if n < 0 { 756 panic(nLT0) 757 } 758 if k < 0 { 759 panic(kLT0) 760 } 761 if lda < k+1 { 762 panic(badLdA) 763 } 764 if incX == 0 { 765 panic(zeroIncX) 766 } 767 768 // Quick return if possible. 769 if n == 0 { 770 return 771 } 772 773 // For zero matrix size the following slice length checks are trivially satisfied. 774 if len(a) < lda*(n-1)+k+1 { 775 panic(shortA) 776 } 777 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 778 panic(shortX) 779 } 780 781 var kx int 782 if incX < 0 { 783 kx = -(n - 1) * incX 784 } 785 786 nonunit := d != blas.Unit 787 788 if tA == blas.NoTrans { 789 if ul == blas.Upper { 790 if incX == 1 { 791 for i := 0; i < n; i++ { 792 u := min(1+k, n-i) 793 var sum float32 794 atmp := a[i*lda:] 795 xtmp := x[i:] 796 for j := 1; j < u; j++ { 797 sum += xtmp[j] * atmp[j] 798 } 799 if nonunit { 800 sum += xtmp[0] * atmp[0] 801 } else { 802 sum += xtmp[0] 803 } 804 x[i] = sum 805 } 806 return 807 } 808 ix := kx 809 for i := 0; i < n; i++ { 810 u := min(1+k, n-i) 811 var sum float32 812 atmp := a[i*lda:] 813 jx := incX 814 for j := 1; j < u; j++ { 815 sum += x[ix+jx] * atmp[j] 816 jx += incX 817 } 818 if nonunit { 819 sum += x[ix] * atmp[0] 820 } else { 821 sum += x[ix] 822 } 823 x[ix] = sum 824 ix += incX 825 } 826 return 827 } 828 if incX == 1 { 829 for i := n - 1; i >= 0; i-- { 830 l := max(0, k-i) 831 atmp := a[i*lda:] 832 var sum float32 833 for j := l; j < k; j++ { 834 sum += x[i-k+j] * atmp[j] 835 } 836 if nonunit { 837 sum += x[i] * atmp[k] 838 } else { 839 sum += x[i] 840 } 841 x[i] = sum 842 } 843 return 844 } 845 ix := kx + (n-1)*incX 846 for i := n - 1; i >= 0; i-- { 847 l := max(0, k-i) 848 atmp := a[i*lda:] 849 var sum float32 850 jx := l * incX 851 for j := l; j < k; j++ { 852 sum += x[ix-k*incX+jx] * atmp[j] 853 jx += incX 854 } 855 if nonunit { 856 sum += x[ix] * atmp[k] 857 } else { 858 sum += x[ix] 859 } 860 x[ix] = sum 861 ix -= incX 862 } 863 return 864 } 865 if ul == blas.Upper { 866 if incX == 1 { 867 for i := n - 1; i >= 0; i-- { 868 u := k + 1 869 if i < u { 870 u = i + 1 871 } 872 var sum float32 873 for j := 1; j < u; j++ { 874 sum += x[i-j] * a[(i-j)*lda+j] 875 } 876 if nonunit { 877 sum += x[i] * a[i*lda] 878 } else { 879 sum += x[i] 880 } 881 x[i] = sum 882 } 883 return 884 } 885 ix := kx + (n-1)*incX 886 for i := n - 1; i >= 0; i-- { 887 u := k + 1 888 if i < u { 889 u = i + 1 890 } 891 var sum float32 892 jx := incX 893 for j := 1; j < u; j++ { 894 sum += x[ix-jx] * a[(i-j)*lda+j] 895 jx += incX 896 } 897 if nonunit { 898 sum += x[ix] * a[i*lda] 899 } else { 900 sum += x[ix] 901 } 902 x[ix] = sum 903 ix -= incX 904 } 905 return 906 } 907 if incX == 1 { 908 for i := 0; i < n; i++ { 909 u := k 910 if i+k >= n { 911 u = n - i - 1 912 } 913 var sum float32 914 for j := 0; j < u; j++ { 915 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 916 } 917 if nonunit { 918 sum += x[i] * a[i*lda+k] 919 } else { 920 sum += x[i] 921 } 922 x[i] = sum 923 } 924 return 925 } 926 ix := kx 927 for i := 0; i < n; i++ { 928 u := k 929 if i+k >= n { 930 u = n - i - 1 931 } 932 var ( 933 sum float32 934 jx int 935 ) 936 for j := 0; j < u; j++ { 937 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 938 jx += incX 939 } 940 if nonunit { 941 sum += x[ix] * a[i*lda+k] 942 } else { 943 sum += x[ix] 944 } 945 x[ix] = sum 946 ix += incX 947 } 948 } 949 950 // Stpmv performs one of the matrix-vector operations 951 // x = A * x if tA == blas.NoTrans 952 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 953 // where A is an n×n triangular matrix in packed format, and x is a vector. 954 // 955 // Float32 implementations are autogenerated and not directly tested. 956 func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 957 if ul != blas.Lower && ul != blas.Upper { 958 panic(badUplo) 959 } 960 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 961 panic(badTranspose) 962 } 963 if d != blas.NonUnit && d != blas.Unit { 964 panic(badDiag) 965 } 966 if n < 0 { 967 panic(nLT0) 968 } 969 if incX == 0 { 970 panic(zeroIncX) 971 } 972 973 // Quick return if possible. 974 if n == 0 { 975 return 976 } 977 978 // For zero matrix size the following slice length checks are trivially satisfied. 979 if len(ap) < n*(n+1)/2 { 980 panic(shortAP) 981 } 982 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 983 panic(shortX) 984 } 985 986 var kx int 987 if incX < 0 { 988 kx = -(n - 1) * incX 989 } 990 991 nonUnit := d == blas.NonUnit 992 var offset int // Offset is the index of (i,i) 993 if tA == blas.NoTrans { 994 if ul == blas.Upper { 995 if incX == 1 { 996 for i := 0; i < n; i++ { 997 xi := x[i] 998 if nonUnit { 999 xi *= ap[offset] 1000 } 1001 atmp := ap[offset+1 : offset+n-i] 1002 xtmp := x[i+1:] 1003 for j, v := range atmp { 1004 xi += v * xtmp[j] 1005 } 1006 x[i] = xi 1007 offset += n - i 1008 } 1009 return 1010 } 1011 ix := kx 1012 for i := 0; i < n; i++ { 1013 xix := x[ix] 1014 if nonUnit { 1015 xix *= ap[offset] 1016 } 1017 atmp := ap[offset+1 : offset+n-i] 1018 jx := kx + (i+1)*incX 1019 for _, v := range atmp { 1020 xix += v * x[jx] 1021 jx += incX 1022 } 1023 x[ix] = xix 1024 offset += n - i 1025 ix += incX 1026 } 1027 return 1028 } 1029 if incX == 1 { 1030 offset = n*(n+1)/2 - 1 1031 for i := n - 1; i >= 0; i-- { 1032 xi := x[i] 1033 if nonUnit { 1034 xi *= ap[offset] 1035 } 1036 atmp := ap[offset-i : offset] 1037 for j, v := range atmp { 1038 xi += v * x[j] 1039 } 1040 x[i] = xi 1041 offset -= i + 1 1042 } 1043 return 1044 } 1045 ix := kx + (n-1)*incX 1046 offset = n*(n+1)/2 - 1 1047 for i := n - 1; i >= 0; i-- { 1048 xix := x[ix] 1049 if nonUnit { 1050 xix *= ap[offset] 1051 } 1052 atmp := ap[offset-i : offset] 1053 jx := kx 1054 for _, v := range atmp { 1055 xix += v * x[jx] 1056 jx += incX 1057 } 1058 x[ix] = xix 1059 offset -= i + 1 1060 ix -= incX 1061 } 1062 return 1063 } 1064 // Cases where ap is transposed. 1065 if ul == blas.Upper { 1066 if incX == 1 { 1067 offset = n*(n+1)/2 - 1 1068 for i := n - 1; i >= 0; i-- { 1069 xi := x[i] 1070 atmp := ap[offset+1 : offset+n-i] 1071 xtmp := x[i+1:] 1072 for j, v := range atmp { 1073 xtmp[j] += v * xi 1074 } 1075 if nonUnit { 1076 x[i] *= ap[offset] 1077 } 1078 offset -= n - i + 1 1079 } 1080 return 1081 } 1082 ix := kx + (n-1)*incX 1083 offset = n*(n+1)/2 - 1 1084 for i := n - 1; i >= 0; i-- { 1085 xix := x[ix] 1086 jx := kx + (i+1)*incX 1087 atmp := ap[offset+1 : offset+n-i] 1088 for _, v := range atmp { 1089 x[jx] += v * xix 1090 jx += incX 1091 } 1092 if nonUnit { 1093 x[ix] *= ap[offset] 1094 } 1095 offset -= n - i + 1 1096 ix -= incX 1097 } 1098 return 1099 } 1100 if incX == 1 { 1101 for i := 0; i < n; i++ { 1102 xi := x[i] 1103 atmp := ap[offset-i : offset] 1104 for j, v := range atmp { 1105 x[j] += v * xi 1106 } 1107 if nonUnit { 1108 x[i] *= ap[offset] 1109 } 1110 offset += i + 2 1111 } 1112 return 1113 } 1114 ix := kx 1115 for i := 0; i < n; i++ { 1116 xix := x[ix] 1117 jx := kx 1118 atmp := ap[offset-i : offset] 1119 for _, v := range atmp { 1120 x[jx] += v * xix 1121 jx += incX 1122 } 1123 if nonUnit { 1124 x[ix] *= ap[offset] 1125 } 1126 ix += incX 1127 offset += i + 2 1128 } 1129 } 1130 1131 // Stbsv solves one of the systems of equations 1132 // A * x = b if tA == blas.NoTrans 1133 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1134 // where A is an n×n triangular band matrix with k+1 diagonals, 1135 // and x and b are vectors. 1136 // 1137 // At entry to the function, x contains the values of b, and the result is 1138 // stored in-place into x. 1139 // 1140 // No test for singularity or near-singularity is included in this 1141 // routine. Such tests must be performed before calling this routine. 1142 // 1143 // Float32 implementations are autogenerated and not directly tested. 1144 func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 1145 if ul != blas.Lower && ul != blas.Upper { 1146 panic(badUplo) 1147 } 1148 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1149 panic(badTranspose) 1150 } 1151 if d != blas.NonUnit && d != blas.Unit { 1152 panic(badDiag) 1153 } 1154 if n < 0 { 1155 panic(nLT0) 1156 } 1157 if k < 0 { 1158 panic(kLT0) 1159 } 1160 if lda < k+1 { 1161 panic(badLdA) 1162 } 1163 if incX == 0 { 1164 panic(zeroIncX) 1165 } 1166 1167 // Quick return if possible. 1168 if n == 0 { 1169 return 1170 } 1171 1172 // For zero matrix size the following slice length checks are trivially satisfied. 1173 if len(a) < lda*(n-1)+k+1 { 1174 panic(shortA) 1175 } 1176 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1177 panic(shortX) 1178 } 1179 1180 var kx int 1181 if incX < 0 { 1182 kx = -(n - 1) * incX 1183 } 1184 nonUnit := d == blas.NonUnit 1185 // Form x = A^-1 x. 1186 // Several cases below use subslices for speed improvement. 1187 // The incX != 1 cases usually do not because incX may be negative. 1188 if tA == blas.NoTrans { 1189 if ul == blas.Upper { 1190 if incX == 1 { 1191 for i := n - 1; i >= 0; i-- { 1192 bands := k 1193 if i+bands >= n { 1194 bands = n - i - 1 1195 } 1196 atmp := a[i*lda+1:] 1197 xtmp := x[i+1 : i+bands+1] 1198 var sum float32 1199 for j, v := range xtmp { 1200 sum += v * atmp[j] 1201 } 1202 x[i] -= sum 1203 if nonUnit { 1204 x[i] /= a[i*lda] 1205 } 1206 } 1207 return 1208 } 1209 ix := kx + (n-1)*incX 1210 for i := n - 1; i >= 0; i-- { 1211 max := k + 1 1212 if i+max > n { 1213 max = n - i 1214 } 1215 atmp := a[i*lda:] 1216 var ( 1217 jx int 1218 sum float32 1219 ) 1220 for j := 1; j < max; j++ { 1221 jx += incX 1222 sum += x[ix+jx] * atmp[j] 1223 } 1224 x[ix] -= sum 1225 if nonUnit { 1226 x[ix] /= atmp[0] 1227 } 1228 ix -= incX 1229 } 1230 return 1231 } 1232 if incX == 1 { 1233 for i := 0; i < n; i++ { 1234 bands := k 1235 if i-k < 0 { 1236 bands = i 1237 } 1238 atmp := a[i*lda+k-bands:] 1239 xtmp := x[i-bands : i] 1240 var sum float32 1241 for j, v := range xtmp { 1242 sum += v * atmp[j] 1243 } 1244 x[i] -= sum 1245 if nonUnit { 1246 x[i] /= atmp[bands] 1247 } 1248 } 1249 return 1250 } 1251 ix := kx 1252 for i := 0; i < n; i++ { 1253 bands := k 1254 if i-k < 0 { 1255 bands = i 1256 } 1257 atmp := a[i*lda+k-bands:] 1258 var ( 1259 sum float32 1260 jx int 1261 ) 1262 for j := 0; j < bands; j++ { 1263 sum += x[ix-bands*incX+jx] * atmp[j] 1264 jx += incX 1265 } 1266 x[ix] -= sum 1267 if nonUnit { 1268 x[ix] /= atmp[bands] 1269 } 1270 ix += incX 1271 } 1272 return 1273 } 1274 // Cases where a is transposed. 1275 if ul == blas.Upper { 1276 if incX == 1 { 1277 for i := 0; i < n; i++ { 1278 bands := k 1279 if i-k < 0 { 1280 bands = i 1281 } 1282 var sum float32 1283 for j := 0; j < bands; j++ { 1284 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1285 } 1286 x[i] -= sum 1287 if nonUnit { 1288 x[i] /= a[i*lda] 1289 } 1290 } 1291 return 1292 } 1293 ix := kx 1294 for i := 0; i < n; i++ { 1295 bands := k 1296 if i-k < 0 { 1297 bands = i 1298 } 1299 var ( 1300 sum float32 1301 jx int 1302 ) 1303 for j := 0; j < bands; j++ { 1304 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1305 jx += incX 1306 } 1307 x[ix] -= sum 1308 if nonUnit { 1309 x[ix] /= a[i*lda] 1310 } 1311 ix += incX 1312 } 1313 return 1314 } 1315 if incX == 1 { 1316 for i := n - 1; i >= 0; i-- { 1317 bands := k 1318 if i+bands >= n { 1319 bands = n - i - 1 1320 } 1321 var sum float32 1322 xtmp := x[i+1 : i+1+bands] 1323 for j, v := range xtmp { 1324 sum += v * a[(i+j+1)*lda+k-j-1] 1325 } 1326 x[i] -= sum 1327 if nonUnit { 1328 x[i] /= a[i*lda+k] 1329 } 1330 } 1331 return 1332 } 1333 ix := kx + (n-1)*incX 1334 for i := n - 1; i >= 0; i-- { 1335 bands := k 1336 if i+bands >= n { 1337 bands = n - i - 1 1338 } 1339 var ( 1340 sum float32 1341 jx int 1342 ) 1343 for j := 0; j < bands; j++ { 1344 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1345 jx += incX 1346 } 1347 x[ix] -= sum 1348 if nonUnit { 1349 x[ix] /= a[i*lda+k] 1350 } 1351 ix -= incX 1352 } 1353 } 1354 1355 // Ssbmv performs the matrix-vector operation 1356 // y = alpha * A * x + beta * y 1357 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1358 // vectors, and alpha and beta are scalars. 1359 // 1360 // Float32 implementations are autogenerated and not directly tested. 1361 func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 1362 if ul != blas.Lower && ul != blas.Upper { 1363 panic(badUplo) 1364 } 1365 if n < 0 { 1366 panic(nLT0) 1367 } 1368 if k < 0 { 1369 panic(kLT0) 1370 } 1371 if lda < k+1 { 1372 panic(badLdA) 1373 } 1374 if incX == 0 { 1375 panic(zeroIncX) 1376 } 1377 if incY == 0 { 1378 panic(zeroIncY) 1379 } 1380 1381 // Quick return if possible. 1382 if n == 0 { 1383 return 1384 } 1385 1386 // For zero matrix size the following slice length checks are trivially satisfied. 1387 if len(a) < lda*(n-1)+k+1 { 1388 panic(shortA) 1389 } 1390 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1391 panic(shortX) 1392 } 1393 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1394 panic(shortY) 1395 } 1396 1397 // Quick return if possible. 1398 if alpha == 0 && beta == 1 { 1399 return 1400 } 1401 1402 // Set up indexes 1403 lenX := n 1404 lenY := n 1405 var kx, ky int 1406 if incX < 0 { 1407 kx = -(lenX - 1) * incX 1408 } 1409 if incY < 0 { 1410 ky = -(lenY - 1) * incY 1411 } 1412 1413 // Form y = beta * y. 1414 if beta != 1 { 1415 if incY == 1 { 1416 if beta == 0 { 1417 for i := range y[:n] { 1418 y[i] = 0 1419 } 1420 } else { 1421 f32.ScalUnitary(beta, y[:n]) 1422 } 1423 } else { 1424 iy := ky 1425 if beta == 0 { 1426 for i := 0; i < n; i++ { 1427 y[iy] = 0 1428 iy += incY 1429 } 1430 } else { 1431 if incY > 0 { 1432 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1433 } else { 1434 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1435 } 1436 } 1437 } 1438 } 1439 1440 if alpha == 0 { 1441 return 1442 } 1443 1444 if ul == blas.Upper { 1445 if incX == 1 { 1446 iy := ky 1447 for i := 0; i < n; i++ { 1448 atmp := a[i*lda:] 1449 tmp := alpha * x[i] 1450 sum := tmp * atmp[0] 1451 u := min(k, n-i-1) 1452 jy := incY 1453 for j := 1; j <= u; j++ { 1454 v := atmp[j] 1455 sum += alpha * x[i+j] * v 1456 y[iy+jy] += tmp * v 1457 jy += incY 1458 } 1459 y[iy] += sum 1460 iy += incY 1461 } 1462 return 1463 } 1464 ix := kx 1465 iy := ky 1466 for i := 0; i < n; i++ { 1467 atmp := a[i*lda:] 1468 tmp := alpha * x[ix] 1469 sum := tmp * atmp[0] 1470 u := min(k, n-i-1) 1471 jx := incX 1472 jy := incY 1473 for j := 1; j <= u; j++ { 1474 v := atmp[j] 1475 sum += alpha * x[ix+jx] * v 1476 y[iy+jy] += tmp * v 1477 jx += incX 1478 jy += incY 1479 } 1480 y[iy] += sum 1481 ix += incX 1482 iy += incY 1483 } 1484 return 1485 } 1486 1487 // Casses where a has bands below the diagonal. 1488 if incX == 1 { 1489 iy := ky 1490 for i := 0; i < n; i++ { 1491 l := max(0, k-i) 1492 tmp := alpha * x[i] 1493 jy := l * incY 1494 atmp := a[i*lda:] 1495 for j := l; j < k; j++ { 1496 v := atmp[j] 1497 y[iy] += alpha * v * x[i-k+j] 1498 y[iy-k*incY+jy] += tmp * v 1499 jy += incY 1500 } 1501 y[iy] += tmp * atmp[k] 1502 iy += incY 1503 } 1504 return 1505 } 1506 ix := kx 1507 iy := ky 1508 for i := 0; i < n; i++ { 1509 l := max(0, k-i) 1510 tmp := alpha * x[ix] 1511 jx := l * incX 1512 jy := l * incY 1513 atmp := a[i*lda:] 1514 for j := l; j < k; j++ { 1515 v := atmp[j] 1516 y[iy] += alpha * v * x[ix-k*incX+jx] 1517 y[iy-k*incY+jy] += tmp * v 1518 jx += incX 1519 jy += incY 1520 } 1521 y[iy] += tmp * atmp[k] 1522 ix += incX 1523 iy += incY 1524 } 1525 } 1526 1527 // Ssyr performs the symmetric rank-one update 1528 // A += alpha * x * xᵀ 1529 // where A is an n×n symmetric matrix, and x is a vector. 1530 // 1531 // Float32 implementations are autogenerated and not directly tested. 1532 func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { 1533 if ul != blas.Lower && ul != blas.Upper { 1534 panic(badUplo) 1535 } 1536 if n < 0 { 1537 panic(nLT0) 1538 } 1539 if lda < max(1, n) { 1540 panic(badLdA) 1541 } 1542 if incX == 0 { 1543 panic(zeroIncX) 1544 } 1545 1546 // Quick return if possible. 1547 if n == 0 { 1548 return 1549 } 1550 1551 // For zero matrix size the following slice length checks are trivially satisfied. 1552 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1553 panic(shortX) 1554 } 1555 if len(a) < lda*(n-1)+n { 1556 panic(shortA) 1557 } 1558 1559 // Quick return if possible. 1560 if alpha == 0 { 1561 return 1562 } 1563 1564 lenX := n 1565 var kx int 1566 if incX < 0 { 1567 kx = -(lenX - 1) * incX 1568 } 1569 if ul == blas.Upper { 1570 if incX == 1 { 1571 for i := 0; i < n; i++ { 1572 tmp := x[i] * alpha 1573 if tmp != 0 { 1574 atmp := a[i*lda+i : i*lda+n] 1575 xtmp := x[i:n] 1576 for j, v := range xtmp { 1577 atmp[j] += v * tmp 1578 } 1579 } 1580 } 1581 return 1582 } 1583 ix := kx 1584 for i := 0; i < n; i++ { 1585 tmp := x[ix] * alpha 1586 if tmp != 0 { 1587 jx := ix 1588 atmp := a[i*lda:] 1589 for j := i; j < n; j++ { 1590 atmp[j] += x[jx] * tmp 1591 jx += incX 1592 } 1593 } 1594 ix += incX 1595 } 1596 return 1597 } 1598 // Cases where a is lower triangular. 1599 if incX == 1 { 1600 for i := 0; i < n; i++ { 1601 tmp := x[i] * alpha 1602 if tmp != 0 { 1603 atmp := a[i*lda:] 1604 xtmp := x[:i+1] 1605 for j, v := range xtmp { 1606 atmp[j] += tmp * v 1607 } 1608 } 1609 } 1610 return 1611 } 1612 ix := kx 1613 for i := 0; i < n; i++ { 1614 tmp := x[ix] * alpha 1615 if tmp != 0 { 1616 atmp := a[i*lda:] 1617 jx := kx 1618 for j := 0; j < i+1; j++ { 1619 atmp[j] += tmp * x[jx] 1620 jx += incX 1621 } 1622 } 1623 ix += incX 1624 } 1625 } 1626 1627 // Ssyr2 performs the symmetric rank-two update 1628 // A += alpha * x * yᵀ + alpha * y * xᵀ 1629 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1630 // 1631 // Float32 implementations are autogenerated and not directly tested. 1632 func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 1633 if ul != blas.Lower && ul != blas.Upper { 1634 panic(badUplo) 1635 } 1636 if n < 0 { 1637 panic(nLT0) 1638 } 1639 if lda < max(1, n) { 1640 panic(badLdA) 1641 } 1642 if incX == 0 { 1643 panic(zeroIncX) 1644 } 1645 if incY == 0 { 1646 panic(zeroIncY) 1647 } 1648 1649 // Quick return if possible. 1650 if n == 0 { 1651 return 1652 } 1653 1654 // For zero matrix size the following slice length checks are trivially satisfied. 1655 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1656 panic(shortX) 1657 } 1658 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1659 panic(shortY) 1660 } 1661 if len(a) < lda*(n-1)+n { 1662 panic(shortA) 1663 } 1664 1665 // Quick return if possible. 1666 if alpha == 0 { 1667 return 1668 } 1669 1670 var ky, kx int 1671 if incY < 0 { 1672 ky = -(n - 1) * incY 1673 } 1674 if incX < 0 { 1675 kx = -(n - 1) * incX 1676 } 1677 if ul == blas.Upper { 1678 if incX == 1 && incY == 1 { 1679 for i := 0; i < n; i++ { 1680 xi := x[i] 1681 yi := y[i] 1682 atmp := a[i*lda:] 1683 for j := i; j < n; j++ { 1684 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1685 } 1686 } 1687 return 1688 } 1689 ix := kx 1690 iy := ky 1691 for i := 0; i < n; i++ { 1692 jx := kx + i*incX 1693 jy := ky + i*incY 1694 xi := x[ix] 1695 yi := y[iy] 1696 atmp := a[i*lda:] 1697 for j := i; j < n; j++ { 1698 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1699 jx += incX 1700 jy += incY 1701 } 1702 ix += incX 1703 iy += incY 1704 } 1705 return 1706 } 1707 if incX == 1 && incY == 1 { 1708 for i := 0; i < n; i++ { 1709 xi := x[i] 1710 yi := y[i] 1711 atmp := a[i*lda:] 1712 for j := 0; j <= i; j++ { 1713 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1714 } 1715 } 1716 return 1717 } 1718 ix := kx 1719 iy := ky 1720 for i := 0; i < n; i++ { 1721 jx := kx 1722 jy := ky 1723 xi := x[ix] 1724 yi := y[iy] 1725 atmp := a[i*lda:] 1726 for j := 0; j <= i; j++ { 1727 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1728 jx += incX 1729 jy += incY 1730 } 1731 ix += incX 1732 iy += incY 1733 } 1734 } 1735 1736 // Stpsv solves one of the systems of equations 1737 // A * x = b if tA == blas.NoTrans 1738 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1739 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1740 // 1741 // At entry to the function, x contains the values of b, and the result is 1742 // stored in-place into x. 1743 // 1744 // No test for singularity or near-singularity is included in this 1745 // routine. Such tests must be performed before calling this routine. 1746 // 1747 // Float32 implementations are autogenerated and not directly tested. 1748 func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 1749 if ul != blas.Lower && ul != blas.Upper { 1750 panic(badUplo) 1751 } 1752 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1753 panic(badTranspose) 1754 } 1755 if d != blas.NonUnit && d != blas.Unit { 1756 panic(badDiag) 1757 } 1758 if n < 0 { 1759 panic(nLT0) 1760 } 1761 if incX == 0 { 1762 panic(zeroIncX) 1763 } 1764 1765 // Quick return if possible. 1766 if n == 0 { 1767 return 1768 } 1769 1770 // For zero matrix size the following slice length checks are trivially satisfied. 1771 if len(ap) < n*(n+1)/2 { 1772 panic(shortAP) 1773 } 1774 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1775 panic(shortX) 1776 } 1777 1778 var kx int 1779 if incX < 0 { 1780 kx = -(n - 1) * incX 1781 } 1782 1783 nonUnit := d == blas.NonUnit 1784 var offset int // Offset is the index of (i,i) 1785 if tA == blas.NoTrans { 1786 if ul == blas.Upper { 1787 offset = n*(n+1)/2 - 1 1788 if incX == 1 { 1789 for i := n - 1; i >= 0; i-- { 1790 atmp := ap[offset+1 : offset+n-i] 1791 xtmp := x[i+1:] 1792 var sum float32 1793 for j, v := range atmp { 1794 sum += v * xtmp[j] 1795 } 1796 x[i] -= sum 1797 if nonUnit { 1798 x[i] /= ap[offset] 1799 } 1800 offset -= n - i + 1 1801 } 1802 return 1803 } 1804 ix := kx + (n-1)*incX 1805 for i := n - 1; i >= 0; i-- { 1806 atmp := ap[offset+1 : offset+n-i] 1807 jx := kx + (i+1)*incX 1808 var sum float32 1809 for _, v := range atmp { 1810 sum += v * x[jx] 1811 jx += incX 1812 } 1813 x[ix] -= sum 1814 if nonUnit { 1815 x[ix] /= ap[offset] 1816 } 1817 ix -= incX 1818 offset -= n - i + 1 1819 } 1820 return 1821 } 1822 if incX == 1 { 1823 for i := 0; i < n; i++ { 1824 atmp := ap[offset-i : offset] 1825 var sum float32 1826 for j, v := range atmp { 1827 sum += v * x[j] 1828 } 1829 x[i] -= sum 1830 if nonUnit { 1831 x[i] /= ap[offset] 1832 } 1833 offset += i + 2 1834 } 1835 return 1836 } 1837 ix := kx 1838 for i := 0; i < n; i++ { 1839 jx := kx 1840 atmp := ap[offset-i : offset] 1841 var sum float32 1842 for _, v := range atmp { 1843 sum += v * x[jx] 1844 jx += incX 1845 } 1846 x[ix] -= sum 1847 if nonUnit { 1848 x[ix] /= ap[offset] 1849 } 1850 ix += incX 1851 offset += i + 2 1852 } 1853 return 1854 } 1855 // Cases where ap is transposed. 1856 if ul == blas.Upper { 1857 if incX == 1 { 1858 for i := 0; i < n; i++ { 1859 if nonUnit { 1860 x[i] /= ap[offset] 1861 } 1862 xi := x[i] 1863 atmp := ap[offset+1 : offset+n-i] 1864 xtmp := x[i+1:] 1865 for j, v := range atmp { 1866 xtmp[j] -= v * xi 1867 } 1868 offset += n - i 1869 } 1870 return 1871 } 1872 ix := kx 1873 for i := 0; i < n; i++ { 1874 if nonUnit { 1875 x[ix] /= ap[offset] 1876 } 1877 xix := x[ix] 1878 atmp := ap[offset+1 : offset+n-i] 1879 jx := kx + (i+1)*incX 1880 for _, v := range atmp { 1881 x[jx] -= v * xix 1882 jx += incX 1883 } 1884 ix += incX 1885 offset += n - i 1886 } 1887 return 1888 } 1889 if incX == 1 { 1890 offset = n*(n+1)/2 - 1 1891 for i := n - 1; i >= 0; i-- { 1892 if nonUnit { 1893 x[i] /= ap[offset] 1894 } 1895 xi := x[i] 1896 atmp := ap[offset-i : offset] 1897 for j, v := range atmp { 1898 x[j] -= v * xi 1899 } 1900 offset -= i + 1 1901 } 1902 return 1903 } 1904 ix := kx + (n-1)*incX 1905 offset = n*(n+1)/2 - 1 1906 for i := n - 1; i >= 0; i-- { 1907 if nonUnit { 1908 x[ix] /= ap[offset] 1909 } 1910 xix := x[ix] 1911 atmp := ap[offset-i : offset] 1912 jx := kx 1913 for _, v := range atmp { 1914 x[jx] -= v * xix 1915 jx += incX 1916 } 1917 ix -= incX 1918 offset -= i + 1 1919 } 1920 } 1921 1922 // Sspmv performs the matrix-vector operation 1923 // y = alpha * A * x + beta * y 1924 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 1925 // and alpha and beta are scalars. 1926 // 1927 // Float32 implementations are autogenerated and not directly tested. 1928 func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) { 1929 if ul != blas.Lower && ul != blas.Upper { 1930 panic(badUplo) 1931 } 1932 if n < 0 { 1933 panic(nLT0) 1934 } 1935 if incX == 0 { 1936 panic(zeroIncX) 1937 } 1938 if incY == 0 { 1939 panic(zeroIncY) 1940 } 1941 1942 // Quick return if possible. 1943 if n == 0 { 1944 return 1945 } 1946 1947 // For zero matrix size the following slice length checks are trivially satisfied. 1948 if len(ap) < n*(n+1)/2 { 1949 panic(shortAP) 1950 } 1951 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1952 panic(shortX) 1953 } 1954 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1955 panic(shortY) 1956 } 1957 1958 // Quick return if possible. 1959 if alpha == 0 && beta == 1 { 1960 return 1961 } 1962 1963 // Set up start points 1964 var kx, ky int 1965 if incX < 0 { 1966 kx = -(n - 1) * incX 1967 } 1968 if incY < 0 { 1969 ky = -(n - 1) * incY 1970 } 1971 1972 // Form y = beta * y. 1973 if beta != 1 { 1974 if incY == 1 { 1975 if beta == 0 { 1976 for i := range y[:n] { 1977 y[i] = 0 1978 } 1979 } else { 1980 f32.ScalUnitary(beta, y[:n]) 1981 } 1982 } else { 1983 iy := ky 1984 if beta == 0 { 1985 for i := 0; i < n; i++ { 1986 y[iy] = 0 1987 iy += incY 1988 } 1989 } else { 1990 if incY > 0 { 1991 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1992 } else { 1993 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1994 } 1995 } 1996 } 1997 } 1998 1999 if alpha == 0 { 2000 return 2001 } 2002 2003 if n == 1 { 2004 y[0] += alpha * ap[0] * x[0] 2005 return 2006 } 2007 var offset int // Offset is the index of (i,i). 2008 if ul == blas.Upper { 2009 if incX == 1 { 2010 iy := ky 2011 for i := 0; i < n; i++ { 2012 xv := x[i] * alpha 2013 sum := ap[offset] * x[i] 2014 atmp := ap[offset+1 : offset+n-i] 2015 xtmp := x[i+1:] 2016 jy := ky + (i+1)*incY 2017 for j, v := range atmp { 2018 sum += v * xtmp[j] 2019 y[jy] += v * xv 2020 jy += incY 2021 } 2022 y[iy] += alpha * sum 2023 iy += incY 2024 offset += n - i 2025 } 2026 return 2027 } 2028 ix := kx 2029 iy := ky 2030 for i := 0; i < n; i++ { 2031 xv := x[ix] * alpha 2032 sum := ap[offset] * x[ix] 2033 atmp := ap[offset+1 : offset+n-i] 2034 jx := kx + (i+1)*incX 2035 jy := ky + (i+1)*incY 2036 for _, v := range atmp { 2037 sum += v * x[jx] 2038 y[jy] += v * xv 2039 jx += incX 2040 jy += incY 2041 } 2042 y[iy] += alpha * sum 2043 ix += incX 2044 iy += incY 2045 offset += n - i 2046 } 2047 return 2048 } 2049 if incX == 1 { 2050 iy := ky 2051 for i := 0; i < n; i++ { 2052 xv := x[i] * alpha 2053 atmp := ap[offset-i : offset] 2054 jy := ky 2055 var sum float32 2056 for j, v := range atmp { 2057 sum += v * x[j] 2058 y[jy] += v * xv 2059 jy += incY 2060 } 2061 sum += ap[offset] * x[i] 2062 y[iy] += alpha * sum 2063 iy += incY 2064 offset += i + 2 2065 } 2066 return 2067 } 2068 ix := kx 2069 iy := ky 2070 for i := 0; i < n; i++ { 2071 xv := x[ix] * alpha 2072 atmp := ap[offset-i : offset] 2073 jx := kx 2074 jy := ky 2075 var sum float32 2076 for _, v := range atmp { 2077 sum += v * x[jx] 2078 y[jy] += v * xv 2079 jx += incX 2080 jy += incY 2081 } 2082 2083 sum += ap[offset] * x[ix] 2084 y[iy] += alpha * sum 2085 ix += incX 2086 iy += incY 2087 offset += i + 2 2088 } 2089 } 2090 2091 // Sspr performs the symmetric rank-one operation 2092 // A += alpha * x * xᵀ 2093 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2094 // alpha is a scalar. 2095 // 2096 // Float32 implementations are autogenerated and not directly tested. 2097 func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) { 2098 if ul != blas.Lower && ul != blas.Upper { 2099 panic(badUplo) 2100 } 2101 if n < 0 { 2102 panic(nLT0) 2103 } 2104 if incX == 0 { 2105 panic(zeroIncX) 2106 } 2107 2108 // Quick return if possible. 2109 if n == 0 { 2110 return 2111 } 2112 2113 // For zero matrix size the following slice length checks are trivially satisfied. 2114 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2115 panic(shortX) 2116 } 2117 if len(ap) < n*(n+1)/2 { 2118 panic(shortAP) 2119 } 2120 2121 // Quick return if possible. 2122 if alpha == 0 { 2123 return 2124 } 2125 2126 lenX := n 2127 var kx int 2128 if incX < 0 { 2129 kx = -(lenX - 1) * incX 2130 } 2131 var offset int // Offset is the index of (i,i). 2132 if ul == blas.Upper { 2133 if incX == 1 { 2134 for i := 0; i < n; i++ { 2135 atmp := ap[offset:] 2136 xv := alpha * x[i] 2137 xtmp := x[i:n] 2138 for j, v := range xtmp { 2139 atmp[j] += xv * v 2140 } 2141 offset += n - i 2142 } 2143 return 2144 } 2145 ix := kx 2146 for i := 0; i < n; i++ { 2147 jx := kx + i*incX 2148 atmp := ap[offset:] 2149 xv := alpha * x[ix] 2150 for j := 0; j < n-i; j++ { 2151 atmp[j] += xv * x[jx] 2152 jx += incX 2153 } 2154 ix += incX 2155 offset += n - i 2156 } 2157 return 2158 } 2159 if incX == 1 { 2160 for i := 0; i < n; i++ { 2161 atmp := ap[offset-i:] 2162 xv := alpha * x[i] 2163 xtmp := x[:i+1] 2164 for j, v := range xtmp { 2165 atmp[j] += xv * v 2166 } 2167 offset += i + 2 2168 } 2169 return 2170 } 2171 ix := kx 2172 for i := 0; i < n; i++ { 2173 jx := kx 2174 atmp := ap[offset-i:] 2175 xv := alpha * x[ix] 2176 for j := 0; j <= i; j++ { 2177 atmp[j] += xv * x[jx] 2178 jx += incX 2179 } 2180 ix += incX 2181 offset += i + 2 2182 } 2183 } 2184 2185 // Sspr2 performs the symmetric rank-2 update 2186 // A += alpha * x * yᵀ + alpha * y * xᵀ 2187 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2188 // and alpha is a scalar. 2189 // 2190 // Float32 implementations are autogenerated and not directly tested. 2191 func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { 2192 if ul != blas.Lower && ul != blas.Upper { 2193 panic(badUplo) 2194 } 2195 if n < 0 { 2196 panic(nLT0) 2197 } 2198 if incX == 0 { 2199 panic(zeroIncX) 2200 } 2201 if incY == 0 { 2202 panic(zeroIncY) 2203 } 2204 2205 // Quick return if possible. 2206 if n == 0 { 2207 return 2208 } 2209 2210 // For zero matrix size the following slice length checks are trivially satisfied. 2211 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2212 panic(shortX) 2213 } 2214 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2215 panic(shortY) 2216 } 2217 if len(ap) < n*(n+1)/2 { 2218 panic(shortAP) 2219 } 2220 2221 // Quick return if possible. 2222 if alpha == 0 { 2223 return 2224 } 2225 2226 var ky, kx int 2227 if incY < 0 { 2228 ky = -(n - 1) * incY 2229 } 2230 if incX < 0 { 2231 kx = -(n - 1) * incX 2232 } 2233 var offset int // Offset is the index of (i,i). 2234 if ul == blas.Upper { 2235 if incX == 1 && incY == 1 { 2236 for i := 0; i < n; i++ { 2237 atmp := ap[offset:] 2238 xi := x[i] 2239 yi := y[i] 2240 xtmp := x[i:n] 2241 ytmp := y[i:n] 2242 for j, v := range xtmp { 2243 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2244 } 2245 offset += n - i 2246 } 2247 return 2248 } 2249 ix := kx 2250 iy := ky 2251 for i := 0; i < n; i++ { 2252 jx := kx + i*incX 2253 jy := ky + i*incY 2254 atmp := ap[offset:] 2255 xi := x[ix] 2256 yi := y[iy] 2257 for j := 0; j < n-i; j++ { 2258 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2259 jx += incX 2260 jy += incY 2261 } 2262 ix += incX 2263 iy += incY 2264 offset += n - i 2265 } 2266 return 2267 } 2268 if incX == 1 && incY == 1 { 2269 for i := 0; i < n; i++ { 2270 atmp := ap[offset-i:] 2271 xi := x[i] 2272 yi := y[i] 2273 xtmp := x[:i+1] 2274 for j, v := range xtmp { 2275 atmp[j] += alpha * (xi*y[j] + v*yi) 2276 } 2277 offset += i + 2 2278 } 2279 return 2280 } 2281 ix := kx 2282 iy := ky 2283 for i := 0; i < n; i++ { 2284 jx := kx 2285 jy := ky 2286 atmp := ap[offset-i:] 2287 for j := 0; j <= i; j++ { 2288 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2289 jx += incX 2290 jy += incY 2291 } 2292 ix += incX 2293 iy += incY 2294 offset += i + 2 2295 } 2296 }