github.com/gopherd/gonum@v0.0.4/blas/gonum/level2float32.go (about) 1 // Code generated by "go generate github.com/gopherd/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2014 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 "github.com/gopherd/gonum/blas" 11 "github.com/gopherd/gonum/internal/asm/f32" 12 ) 13 14 var _ blas.Float32Level2 = Implementation{} 15 16 // Sger performs the rank-one operation 17 // A += alpha * x * yᵀ 18 // where A is an m×n dense matrix, x and y are vectors, and alpha is a scalar. 19 // 20 // Float32 implementations are autogenerated and not directly tested. 21 func (Implementation) Sger(m, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 22 if m < 0 { 23 panic(mLT0) 24 } 25 if n < 0 { 26 panic(nLT0) 27 } 28 if lda < max(1, n) { 29 panic(badLdA) 30 } 31 if incX == 0 { 32 panic(zeroIncX) 33 } 34 if incY == 0 { 35 panic(zeroIncY) 36 } 37 38 // Quick return if possible. 39 if m == 0 || n == 0 { 40 return 41 } 42 43 // For zero matrix size the following slice length checks are trivially satisfied. 44 if (incX > 0 && len(x) <= (m-1)*incX) || (incX < 0 && len(x) <= (1-m)*incX) { 45 panic(shortX) 46 } 47 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 48 panic(shortY) 49 } 50 if len(a) < lda*(m-1)+n { 51 panic(shortA) 52 } 53 54 // Quick return if possible. 55 if alpha == 0 { 56 return 57 } 58 f32.Ger(uintptr(m), uintptr(n), 59 alpha, 60 x, uintptr(incX), 61 y, uintptr(incY), 62 a, uintptr(lda)) 63 } 64 65 // Sgbmv performs one of the matrix-vector operations 66 // y = alpha * A * x + beta * y if tA == blas.NoTrans 67 // y = alpha * Aᵀ * x + beta * y if tA == blas.Trans or blas.ConjTrans 68 // where A is an m×n band matrix with kL sub-diagonals and kU super-diagonals, 69 // x and y are vectors, and alpha and beta are scalars. 70 // 71 // Float32 implementations are autogenerated and not directly tested. 72 func (Implementation) Sgbmv(tA blas.Transpose, m, n, kL, kU int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 73 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 74 panic(badTranspose) 75 } 76 if m < 0 { 77 panic(mLT0) 78 } 79 if n < 0 { 80 panic(nLT0) 81 } 82 if kL < 0 { 83 panic(kLLT0) 84 } 85 if kU < 0 { 86 panic(kULT0) 87 } 88 if lda < kL+kU+1 { 89 panic(badLdA) 90 } 91 if incX == 0 { 92 panic(zeroIncX) 93 } 94 if incY == 0 { 95 panic(zeroIncY) 96 } 97 98 // Quick return if possible. 99 if m == 0 || n == 0 { 100 return 101 } 102 103 // For zero matrix size the following slice length checks are trivially satisfied. 104 if len(a) < lda*(min(m, n+kL)-1)+kL+kU+1 { 105 panic(shortA) 106 } 107 lenX := m 108 lenY := n 109 if tA == blas.NoTrans { 110 lenX = n 111 lenY = m 112 } 113 if (incX > 0 && len(x) <= (lenX-1)*incX) || (incX < 0 && len(x) <= (1-lenX)*incX) { 114 panic(shortX) 115 } 116 if (incY > 0 && len(y) <= (lenY-1)*incY) || (incY < 0 && len(y) <= (1-lenY)*incY) { 117 panic(shortY) 118 } 119 120 // Quick return if possible. 121 if alpha == 0 && beta == 1 { 122 return 123 } 124 125 var kx, ky int 126 if incX < 0 { 127 kx = -(lenX - 1) * incX 128 } 129 if incY < 0 { 130 ky = -(lenY - 1) * incY 131 } 132 133 // Form y = beta * y. 134 if beta != 1 { 135 if incY == 1 { 136 if beta == 0 { 137 for i := range y[:lenY] { 138 y[i] = 0 139 } 140 } else { 141 f32.ScalUnitary(beta, y[:lenY]) 142 } 143 } else { 144 iy := ky 145 if beta == 0 { 146 for i := 0; i < lenY; i++ { 147 y[iy] = 0 148 iy += incY 149 } 150 } else { 151 if incY > 0 { 152 f32.ScalInc(beta, y, uintptr(lenY), uintptr(incY)) 153 } else { 154 f32.ScalInc(beta, y, uintptr(lenY), uintptr(-incY)) 155 } 156 } 157 } 158 } 159 160 if alpha == 0 { 161 return 162 } 163 164 // i and j are indices of the compacted banded matrix. 165 // off is the offset into the dense matrix (off + j = densej) 166 nCol := kU + 1 + kL 167 if tA == blas.NoTrans { 168 iy := ky 169 if incX == 1 { 170 for i := 0; i < min(m, n+kL); i++ { 171 l := max(0, kL-i) 172 u := min(nCol, n+kL-i) 173 off := max(0, i-kL) 174 atmp := a[i*lda+l : i*lda+u] 175 xtmp := x[off : off+u-l] 176 var sum float32 177 for j, v := range atmp { 178 sum += xtmp[j] * v 179 } 180 y[iy] += sum * alpha 181 iy += incY 182 } 183 return 184 } 185 for i := 0; i < min(m, n+kL); i++ { 186 l := max(0, kL-i) 187 u := min(nCol, n+kL-i) 188 off := max(0, i-kL) 189 atmp := a[i*lda+l : i*lda+u] 190 jx := kx 191 var sum float32 192 for _, v := range atmp { 193 sum += x[off*incX+jx] * v 194 jx += incX 195 } 196 y[iy] += sum * alpha 197 iy += incY 198 } 199 return 200 } 201 if incX == 1 { 202 for i := 0; i < min(m, n+kL); i++ { 203 l := max(0, kL-i) 204 u := min(nCol, n+kL-i) 205 off := max(0, i-kL) 206 atmp := a[i*lda+l : i*lda+u] 207 tmp := alpha * x[i] 208 jy := ky 209 for _, v := range atmp { 210 y[jy+off*incY] += tmp * v 211 jy += incY 212 } 213 } 214 return 215 } 216 ix := kx 217 for i := 0; i < min(m, n+kL); i++ { 218 l := max(0, kL-i) 219 u := min(nCol, n+kL-i) 220 off := max(0, i-kL) 221 atmp := a[i*lda+l : i*lda+u] 222 tmp := alpha * x[ix] 223 jy := ky 224 for _, v := range atmp { 225 y[jy+off*incY] += tmp * v 226 jy += incY 227 } 228 ix += incX 229 } 230 } 231 232 // Sgemv computes 233 // y = alpha * A * x + beta * y if tA = blas.NoTrans 234 // y = alpha * Aᵀ * x + beta * y if tA = blas.Trans or blas.ConjTrans 235 // where A is an m×n dense matrix, x and y are vectors, and alpha and beta are scalars. 236 // 237 // Float32 implementations are autogenerated and not directly tested. 238 func (Implementation) Sgemv(tA blas.Transpose, m, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 239 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 240 panic(badTranspose) 241 } 242 if m < 0 { 243 panic(mLT0) 244 } 245 if n < 0 { 246 panic(nLT0) 247 } 248 if lda < max(1, n) { 249 panic(badLdA) 250 } 251 if incX == 0 { 252 panic(zeroIncX) 253 } 254 if incY == 0 { 255 panic(zeroIncY) 256 } 257 // Set up indexes 258 lenX := m 259 lenY := n 260 if tA == blas.NoTrans { 261 lenX = n 262 lenY = m 263 } 264 265 // Quick return if possible 266 if m == 0 || n == 0 { 267 return 268 } 269 270 if (incX > 0 && (lenX-1)*incX >= len(x)) || (incX < 0 && (1-lenX)*incX >= len(x)) { 271 panic(shortX) 272 } 273 if (incY > 0 && (lenY-1)*incY >= len(y)) || (incY < 0 && (1-lenY)*incY >= len(y)) { 274 panic(shortY) 275 } 276 if len(a) < lda*(m-1)+n { 277 panic(shortA) 278 } 279 280 // Quick return if possible 281 if alpha == 0 && beta == 1 { 282 return 283 } 284 285 if alpha == 0 { 286 // First form y = beta * y 287 if incY > 0 { 288 Implementation{}.Sscal(lenY, beta, y, incY) 289 } else { 290 Implementation{}.Sscal(lenY, beta, y, -incY) 291 } 292 return 293 } 294 295 // Form y = alpha * A * x + y 296 if tA == blas.NoTrans { 297 f32.GemvN(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 298 return 299 } 300 // Cases where a is transposed. 301 f32.GemvT(uintptr(m), uintptr(n), alpha, a, uintptr(lda), x, uintptr(incX), beta, y, uintptr(incY)) 302 } 303 304 // Strmv performs one of the matrix-vector operations 305 // x = A * x if tA == blas.NoTrans 306 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 307 // where A is an n×n triangular matrix, and x is a vector. 308 // 309 // Float32 implementations are autogenerated and not directly tested. 310 func (Implementation) Strmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 311 if ul != blas.Lower && ul != blas.Upper { 312 panic(badUplo) 313 } 314 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 315 panic(badTranspose) 316 } 317 if d != blas.NonUnit && d != blas.Unit { 318 panic(badDiag) 319 } 320 if n < 0 { 321 panic(nLT0) 322 } 323 if lda < max(1, n) { 324 panic(badLdA) 325 } 326 if incX == 0 { 327 panic(zeroIncX) 328 } 329 330 // Quick return if possible. 331 if n == 0 { 332 return 333 } 334 335 // For zero matrix size the following slice length checks are trivially satisfied. 336 if len(a) < lda*(n-1)+n { 337 panic(shortA) 338 } 339 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 340 panic(shortX) 341 } 342 343 nonUnit := d != blas.Unit 344 if n == 1 { 345 if nonUnit { 346 x[0] *= a[0] 347 } 348 return 349 } 350 var kx int 351 if incX <= 0 { 352 kx = -(n - 1) * incX 353 } 354 if tA == blas.NoTrans { 355 if ul == blas.Upper { 356 if incX == 1 { 357 for i := 0; i < n; i++ { 358 ilda := i * lda 359 var tmp float32 360 if nonUnit { 361 tmp = a[ilda+i] * x[i] 362 } else { 363 tmp = x[i] 364 } 365 x[i] = tmp + f32.DotUnitary(a[ilda+i+1:ilda+n], x[i+1:n]) 366 } 367 return 368 } 369 ix := kx 370 for i := 0; i < n; i++ { 371 ilda := i * lda 372 var tmp float32 373 if nonUnit { 374 tmp = a[ilda+i] * x[ix] 375 } else { 376 tmp = x[ix] 377 } 378 x[ix] = tmp + f32.DotInc(x, a[ilda+i+1:ilda+n], uintptr(n-i-1), uintptr(incX), 1, uintptr(ix+incX), 0) 379 ix += incX 380 } 381 return 382 } 383 if incX == 1 { 384 for i := n - 1; i >= 0; i-- { 385 ilda := i * lda 386 var tmp float32 387 if nonUnit { 388 tmp += a[ilda+i] * x[i] 389 } else { 390 tmp = x[i] 391 } 392 x[i] = tmp + f32.DotUnitary(a[ilda:ilda+i], x[:i]) 393 } 394 return 395 } 396 ix := kx + (n-1)*incX 397 for i := n - 1; i >= 0; i-- { 398 ilda := i * lda 399 var tmp float32 400 if nonUnit { 401 tmp = a[ilda+i] * x[ix] 402 } else { 403 tmp = x[ix] 404 } 405 x[ix] = tmp + f32.DotInc(x, a[ilda:ilda+i], uintptr(i), uintptr(incX), 1, uintptr(kx), 0) 406 ix -= incX 407 } 408 return 409 } 410 // Cases where a is transposed. 411 if ul == blas.Upper { 412 if incX == 1 { 413 for i := n - 1; i >= 0; i-- { 414 ilda := i * lda 415 xi := x[i] 416 f32.AxpyUnitary(xi, a[ilda+i+1:ilda+n], x[i+1:n]) 417 if nonUnit { 418 x[i] *= a[ilda+i] 419 } 420 } 421 return 422 } 423 ix := kx + (n-1)*incX 424 for i := n - 1; i >= 0; i-- { 425 ilda := i * lda 426 xi := x[ix] 427 f32.AxpyInc(xi, a[ilda+i+1:ilda+n], x, uintptr(n-i-1), 1, uintptr(incX), 0, uintptr(kx+(i+1)*incX)) 428 if nonUnit { 429 x[ix] *= a[ilda+i] 430 } 431 ix -= incX 432 } 433 return 434 } 435 if incX == 1 { 436 for i := 0; i < n; i++ { 437 ilda := i * lda 438 xi := x[i] 439 f32.AxpyUnitary(xi, a[ilda:ilda+i], x[:i]) 440 if nonUnit { 441 x[i] *= a[i*lda+i] 442 } 443 } 444 return 445 } 446 ix := kx 447 for i := 0; i < n; i++ { 448 ilda := i * lda 449 xi := x[ix] 450 f32.AxpyInc(xi, a[ilda:ilda+i], x, uintptr(i), 1, uintptr(incX), 0, uintptr(kx)) 451 if nonUnit { 452 x[ix] *= a[ilda+i] 453 } 454 ix += incX 455 } 456 } 457 458 // Strsv solves one of the systems of equations 459 // A * x = b if tA == blas.NoTrans 460 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 461 // where A is an n×n triangular matrix, and x and b are vectors. 462 // 463 // At entry to the function, x contains the values of b, and the result is 464 // stored in-place into x. 465 // 466 // No test for singularity or near-singularity is included in this 467 // routine. Such tests must be performed before calling this routine. 468 // 469 // Float32 implementations are autogenerated and not directly tested. 470 func (Implementation) Strsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, a []float32, lda int, x []float32, incX int) { 471 if ul != blas.Lower && ul != blas.Upper { 472 panic(badUplo) 473 } 474 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 475 panic(badTranspose) 476 } 477 if d != blas.NonUnit && d != blas.Unit { 478 panic(badDiag) 479 } 480 if n < 0 { 481 panic(nLT0) 482 } 483 if lda < max(1, n) { 484 panic(badLdA) 485 } 486 if incX == 0 { 487 panic(zeroIncX) 488 } 489 490 // Quick return if possible. 491 if n == 0 { 492 return 493 } 494 495 // For zero matrix size the following slice length checks are trivially satisfied. 496 if len(a) < lda*(n-1)+n { 497 panic(shortA) 498 } 499 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 500 panic(shortX) 501 } 502 503 if n == 1 { 504 if d == blas.NonUnit { 505 x[0] /= a[0] 506 } 507 return 508 } 509 510 var kx int 511 if incX < 0 { 512 kx = -(n - 1) * incX 513 } 514 nonUnit := d == blas.NonUnit 515 if tA == blas.NoTrans { 516 if ul == blas.Upper { 517 if incX == 1 { 518 for i := n - 1; i >= 0; i-- { 519 var sum float32 520 atmp := a[i*lda+i+1 : i*lda+n] 521 for j, v := range atmp { 522 jv := i + j + 1 523 sum += x[jv] * v 524 } 525 x[i] -= sum 526 if nonUnit { 527 x[i] /= a[i*lda+i] 528 } 529 } 530 return 531 } 532 ix := kx + (n-1)*incX 533 for i := n - 1; i >= 0; i-- { 534 var sum float32 535 jx := ix + incX 536 atmp := a[i*lda+i+1 : i*lda+n] 537 for _, v := range atmp { 538 sum += x[jx] * v 539 jx += incX 540 } 541 x[ix] -= sum 542 if nonUnit { 543 x[ix] /= a[i*lda+i] 544 } 545 ix -= incX 546 } 547 return 548 } 549 if incX == 1 { 550 for i := 0; i < n; i++ { 551 var sum float32 552 atmp := a[i*lda : i*lda+i] 553 for j, v := range atmp { 554 sum += x[j] * v 555 } 556 x[i] -= sum 557 if nonUnit { 558 x[i] /= a[i*lda+i] 559 } 560 } 561 return 562 } 563 ix := kx 564 for i := 0; i < n; i++ { 565 jx := kx 566 var sum float32 567 atmp := a[i*lda : i*lda+i] 568 for _, v := range atmp { 569 sum += x[jx] * v 570 jx += incX 571 } 572 x[ix] -= sum 573 if nonUnit { 574 x[ix] /= a[i*lda+i] 575 } 576 ix += incX 577 } 578 return 579 } 580 // Cases where a is transposed. 581 if ul == blas.Upper { 582 if incX == 1 { 583 for i := 0; i < n; i++ { 584 if nonUnit { 585 x[i] /= a[i*lda+i] 586 } 587 xi := x[i] 588 atmp := a[i*lda+i+1 : i*lda+n] 589 for j, v := range atmp { 590 jv := j + i + 1 591 x[jv] -= v * xi 592 } 593 } 594 return 595 } 596 ix := kx 597 for i := 0; i < n; i++ { 598 if nonUnit { 599 x[ix] /= a[i*lda+i] 600 } 601 xi := x[ix] 602 jx := kx + (i+1)*incX 603 atmp := a[i*lda+i+1 : i*lda+n] 604 for _, v := range atmp { 605 x[jx] -= v * xi 606 jx += incX 607 } 608 ix += incX 609 } 610 return 611 } 612 if incX == 1 { 613 for i := n - 1; i >= 0; i-- { 614 if nonUnit { 615 x[i] /= a[i*lda+i] 616 } 617 xi := x[i] 618 atmp := a[i*lda : i*lda+i] 619 for j, v := range atmp { 620 x[j] -= v * xi 621 } 622 } 623 return 624 } 625 ix := kx + (n-1)*incX 626 for i := n - 1; i >= 0; i-- { 627 if nonUnit { 628 x[ix] /= a[i*lda+i] 629 } 630 xi := x[ix] 631 jx := kx 632 atmp := a[i*lda : i*lda+i] 633 for _, v := range atmp { 634 x[jx] -= v * xi 635 jx += incX 636 } 637 ix -= incX 638 } 639 } 640 641 // Ssymv performs the matrix-vector operation 642 // y = alpha * A * x + beta * y 643 // where A is an n×n symmetric matrix, x and y are vectors, and alpha and 644 // beta are scalars. 645 // 646 // Float32 implementations are autogenerated and not directly tested. 647 func (Implementation) Ssymv(ul blas.Uplo, n int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 648 if ul != blas.Lower && ul != blas.Upper { 649 panic(badUplo) 650 } 651 if n < 0 { 652 panic(nLT0) 653 } 654 if lda < max(1, n) { 655 panic(badLdA) 656 } 657 if incX == 0 { 658 panic(zeroIncX) 659 } 660 if incY == 0 { 661 panic(zeroIncY) 662 } 663 664 // Quick return if possible. 665 if n == 0 { 666 return 667 } 668 669 // For zero matrix size the following slice length checks are trivially satisfied. 670 if len(a) < lda*(n-1)+n { 671 panic(shortA) 672 } 673 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 674 panic(shortX) 675 } 676 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 677 panic(shortY) 678 } 679 680 // Quick return if possible. 681 if alpha == 0 && beta == 1 { 682 return 683 } 684 685 // Set up start points 686 var kx, ky int 687 if incX < 0 { 688 kx = -(n - 1) * incX 689 } 690 if incY < 0 { 691 ky = -(n - 1) * incY 692 } 693 694 // Form y = beta * y 695 if beta != 1 { 696 if incY == 1 { 697 if beta == 0 { 698 for i := range y[:n] { 699 y[i] = 0 700 } 701 } else { 702 f32.ScalUnitary(beta, y[:n]) 703 } 704 } else { 705 iy := ky 706 if beta == 0 { 707 for i := 0; i < n; i++ { 708 y[iy] = 0 709 iy += incY 710 } 711 } else { 712 if incY > 0 { 713 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 714 } else { 715 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 716 } 717 } 718 } 719 } 720 721 if alpha == 0 { 722 return 723 } 724 725 if n == 1 { 726 y[0] += alpha * a[0] * x[0] 727 return 728 } 729 730 if ul == blas.Upper { 731 if incX == 1 { 732 iy := ky 733 for i := 0; i < n; i++ { 734 xv := x[i] * alpha 735 sum := x[i] * a[i*lda+i] 736 jy := ky + (i+1)*incY 737 atmp := a[i*lda+i+1 : i*lda+n] 738 for j, v := range atmp { 739 jp := j + i + 1 740 sum += x[jp] * v 741 y[jy] += xv * v 742 jy += incY 743 } 744 y[iy] += alpha * sum 745 iy += incY 746 } 747 return 748 } 749 ix := kx 750 iy := ky 751 for i := 0; i < n; i++ { 752 xv := x[ix] * alpha 753 sum := x[ix] * a[i*lda+i] 754 jx := kx + (i+1)*incX 755 jy := ky + (i+1)*incY 756 atmp := a[i*lda+i+1 : i*lda+n] 757 for _, v := range atmp { 758 sum += x[jx] * v 759 y[jy] += xv * v 760 jx += incX 761 jy += incY 762 } 763 y[iy] += alpha * sum 764 ix += incX 765 iy += incY 766 } 767 return 768 } 769 // Cases where a is lower triangular. 770 if incX == 1 { 771 iy := ky 772 for i := 0; i < n; i++ { 773 jy := ky 774 xv := alpha * x[i] 775 atmp := a[i*lda : i*lda+i] 776 var sum float32 777 for j, v := range atmp { 778 sum += x[j] * v 779 y[jy] += xv * v 780 jy += incY 781 } 782 sum += x[i] * a[i*lda+i] 783 sum *= alpha 784 y[iy] += sum 785 iy += incY 786 } 787 return 788 } 789 ix := kx 790 iy := ky 791 for i := 0; i < n; i++ { 792 jx := kx 793 jy := ky 794 xv := alpha * x[ix] 795 atmp := a[i*lda : i*lda+i] 796 var sum float32 797 for _, v := range atmp { 798 sum += x[jx] * v 799 y[jy] += xv * v 800 jx += incX 801 jy += incY 802 } 803 sum += x[ix] * a[i*lda+i] 804 sum *= alpha 805 y[iy] += sum 806 ix += incX 807 iy += incY 808 } 809 } 810 811 // Stbmv performs one of the matrix-vector operations 812 // x = A * x if tA == blas.NoTrans 813 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 814 // where A is an n×n triangular band matrix with k+1 diagonals, and x is a vector. 815 // 816 // Float32 implementations are autogenerated and not directly tested. 817 func (Implementation) Stbmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 818 if ul != blas.Lower && ul != blas.Upper { 819 panic(badUplo) 820 } 821 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 822 panic(badTranspose) 823 } 824 if d != blas.NonUnit && d != blas.Unit { 825 panic(badDiag) 826 } 827 if n < 0 { 828 panic(nLT0) 829 } 830 if k < 0 { 831 panic(kLT0) 832 } 833 if lda < k+1 { 834 panic(badLdA) 835 } 836 if incX == 0 { 837 panic(zeroIncX) 838 } 839 840 // Quick return if possible. 841 if n == 0 { 842 return 843 } 844 845 // For zero matrix size the following slice length checks are trivially satisfied. 846 if len(a) < lda*(n-1)+k+1 { 847 panic(shortA) 848 } 849 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 850 panic(shortX) 851 } 852 853 var kx int 854 if incX < 0 { 855 kx = -(n - 1) * incX 856 } 857 858 nonunit := d != blas.Unit 859 860 if tA == blas.NoTrans { 861 if ul == blas.Upper { 862 if incX == 1 { 863 for i := 0; i < n; i++ { 864 u := min(1+k, n-i) 865 var sum float32 866 atmp := a[i*lda:] 867 xtmp := x[i:] 868 for j := 1; j < u; j++ { 869 sum += xtmp[j] * atmp[j] 870 } 871 if nonunit { 872 sum += xtmp[0] * atmp[0] 873 } else { 874 sum += xtmp[0] 875 } 876 x[i] = sum 877 } 878 return 879 } 880 ix := kx 881 for i := 0; i < n; i++ { 882 u := min(1+k, n-i) 883 var sum float32 884 atmp := a[i*lda:] 885 jx := incX 886 for j := 1; j < u; j++ { 887 sum += x[ix+jx] * atmp[j] 888 jx += incX 889 } 890 if nonunit { 891 sum += x[ix] * atmp[0] 892 } else { 893 sum += x[ix] 894 } 895 x[ix] = sum 896 ix += incX 897 } 898 return 899 } 900 if incX == 1 { 901 for i := n - 1; i >= 0; i-- { 902 l := max(0, k-i) 903 atmp := a[i*lda:] 904 var sum float32 905 for j := l; j < k; j++ { 906 sum += x[i-k+j] * atmp[j] 907 } 908 if nonunit { 909 sum += x[i] * atmp[k] 910 } else { 911 sum += x[i] 912 } 913 x[i] = sum 914 } 915 return 916 } 917 ix := kx + (n-1)*incX 918 for i := n - 1; i >= 0; i-- { 919 l := max(0, k-i) 920 atmp := a[i*lda:] 921 var sum float32 922 jx := l * incX 923 for j := l; j < k; j++ { 924 sum += x[ix-k*incX+jx] * atmp[j] 925 jx += incX 926 } 927 if nonunit { 928 sum += x[ix] * atmp[k] 929 } else { 930 sum += x[ix] 931 } 932 x[ix] = sum 933 ix -= incX 934 } 935 return 936 } 937 if ul == blas.Upper { 938 if incX == 1 { 939 for i := n - 1; i >= 0; i-- { 940 u := k + 1 941 if i < u { 942 u = i + 1 943 } 944 var sum float32 945 for j := 1; j < u; j++ { 946 sum += x[i-j] * a[(i-j)*lda+j] 947 } 948 if nonunit { 949 sum += x[i] * a[i*lda] 950 } else { 951 sum += x[i] 952 } 953 x[i] = sum 954 } 955 return 956 } 957 ix := kx + (n-1)*incX 958 for i := n - 1; i >= 0; i-- { 959 u := k + 1 960 if i < u { 961 u = i + 1 962 } 963 var sum float32 964 jx := incX 965 for j := 1; j < u; j++ { 966 sum += x[ix-jx] * a[(i-j)*lda+j] 967 jx += incX 968 } 969 if nonunit { 970 sum += x[ix] * a[i*lda] 971 } else { 972 sum += x[ix] 973 } 974 x[ix] = sum 975 ix -= incX 976 } 977 return 978 } 979 if incX == 1 { 980 for i := 0; i < n; i++ { 981 u := k 982 if i+k >= n { 983 u = n - i - 1 984 } 985 var sum float32 986 for j := 0; j < u; j++ { 987 sum += x[i+j+1] * a[(i+j+1)*lda+k-j-1] 988 } 989 if nonunit { 990 sum += x[i] * a[i*lda+k] 991 } else { 992 sum += x[i] 993 } 994 x[i] = sum 995 } 996 return 997 } 998 ix := kx 999 for i := 0; i < n; i++ { 1000 u := k 1001 if i+k >= n { 1002 u = n - i - 1 1003 } 1004 var ( 1005 sum float32 1006 jx int 1007 ) 1008 for j := 0; j < u; j++ { 1009 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1010 jx += incX 1011 } 1012 if nonunit { 1013 sum += x[ix] * a[i*lda+k] 1014 } else { 1015 sum += x[ix] 1016 } 1017 x[ix] = sum 1018 ix += incX 1019 } 1020 } 1021 1022 // Stpmv performs one of the matrix-vector operations 1023 // x = A * x if tA == blas.NoTrans 1024 // x = Aᵀ * x if tA == blas.Trans or blas.ConjTrans 1025 // where A is an n×n triangular matrix in packed format, and x is a vector. 1026 // 1027 // Float32 implementations are autogenerated and not directly tested. 1028 func (Implementation) Stpmv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 1029 if ul != blas.Lower && ul != blas.Upper { 1030 panic(badUplo) 1031 } 1032 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1033 panic(badTranspose) 1034 } 1035 if d != blas.NonUnit && d != blas.Unit { 1036 panic(badDiag) 1037 } 1038 if n < 0 { 1039 panic(nLT0) 1040 } 1041 if incX == 0 { 1042 panic(zeroIncX) 1043 } 1044 1045 // Quick return if possible. 1046 if n == 0 { 1047 return 1048 } 1049 1050 // For zero matrix size the following slice length checks are trivially satisfied. 1051 if len(ap) < n*(n+1)/2 { 1052 panic(shortAP) 1053 } 1054 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1055 panic(shortX) 1056 } 1057 1058 var kx int 1059 if incX < 0 { 1060 kx = -(n - 1) * incX 1061 } 1062 1063 nonUnit := d == blas.NonUnit 1064 var offset int // Offset is the index of (i,i) 1065 if tA == blas.NoTrans { 1066 if ul == blas.Upper { 1067 if incX == 1 { 1068 for i := 0; i < n; i++ { 1069 xi := x[i] 1070 if nonUnit { 1071 xi *= ap[offset] 1072 } 1073 atmp := ap[offset+1 : offset+n-i] 1074 xtmp := x[i+1:] 1075 for j, v := range atmp { 1076 xi += v * xtmp[j] 1077 } 1078 x[i] = xi 1079 offset += n - i 1080 } 1081 return 1082 } 1083 ix := kx 1084 for i := 0; i < n; i++ { 1085 xix := x[ix] 1086 if nonUnit { 1087 xix *= ap[offset] 1088 } 1089 atmp := ap[offset+1 : offset+n-i] 1090 jx := kx + (i+1)*incX 1091 for _, v := range atmp { 1092 xix += v * x[jx] 1093 jx += incX 1094 } 1095 x[ix] = xix 1096 offset += n - i 1097 ix += incX 1098 } 1099 return 1100 } 1101 if incX == 1 { 1102 offset = n*(n+1)/2 - 1 1103 for i := n - 1; i >= 0; i-- { 1104 xi := x[i] 1105 if nonUnit { 1106 xi *= ap[offset] 1107 } 1108 atmp := ap[offset-i : offset] 1109 for j, v := range atmp { 1110 xi += v * x[j] 1111 } 1112 x[i] = xi 1113 offset -= i + 1 1114 } 1115 return 1116 } 1117 ix := kx + (n-1)*incX 1118 offset = n*(n+1)/2 - 1 1119 for i := n - 1; i >= 0; i-- { 1120 xix := x[ix] 1121 if nonUnit { 1122 xix *= ap[offset] 1123 } 1124 atmp := ap[offset-i : offset] 1125 jx := kx 1126 for _, v := range atmp { 1127 xix += v * x[jx] 1128 jx += incX 1129 } 1130 x[ix] = xix 1131 offset -= i + 1 1132 ix -= incX 1133 } 1134 return 1135 } 1136 // Cases where ap is transposed. 1137 if ul == blas.Upper { 1138 if incX == 1 { 1139 offset = n*(n+1)/2 - 1 1140 for i := n - 1; i >= 0; i-- { 1141 xi := x[i] 1142 atmp := ap[offset+1 : offset+n-i] 1143 xtmp := x[i+1:] 1144 for j, v := range atmp { 1145 xtmp[j] += v * xi 1146 } 1147 if nonUnit { 1148 x[i] *= ap[offset] 1149 } 1150 offset -= n - i + 1 1151 } 1152 return 1153 } 1154 ix := kx + (n-1)*incX 1155 offset = n*(n+1)/2 - 1 1156 for i := n - 1; i >= 0; i-- { 1157 xix := x[ix] 1158 jx := kx + (i+1)*incX 1159 atmp := ap[offset+1 : offset+n-i] 1160 for _, v := range atmp { 1161 x[jx] += v * xix 1162 jx += incX 1163 } 1164 if nonUnit { 1165 x[ix] *= ap[offset] 1166 } 1167 offset -= n - i + 1 1168 ix -= incX 1169 } 1170 return 1171 } 1172 if incX == 1 { 1173 for i := 0; i < n; i++ { 1174 xi := x[i] 1175 atmp := ap[offset-i : offset] 1176 for j, v := range atmp { 1177 x[j] += v * xi 1178 } 1179 if nonUnit { 1180 x[i] *= ap[offset] 1181 } 1182 offset += i + 2 1183 } 1184 return 1185 } 1186 ix := kx 1187 for i := 0; i < n; i++ { 1188 xix := x[ix] 1189 jx := kx 1190 atmp := ap[offset-i : offset] 1191 for _, v := range atmp { 1192 x[jx] += v * xix 1193 jx += incX 1194 } 1195 if nonUnit { 1196 x[ix] *= ap[offset] 1197 } 1198 ix += incX 1199 offset += i + 2 1200 } 1201 } 1202 1203 // Stbsv solves one of the systems of equations 1204 // A * x = b if tA == blas.NoTrans 1205 // Aᵀ * x = b if tA == blas.Trans or tA == blas.ConjTrans 1206 // where A is an n×n triangular band matrix with k+1 diagonals, 1207 // and x and b are vectors. 1208 // 1209 // At entry to the function, x contains the values of b, and the result is 1210 // stored in-place into x. 1211 // 1212 // No test for singularity or near-singularity is included in this 1213 // routine. Such tests must be performed before calling this routine. 1214 // 1215 // Float32 implementations are autogenerated and not directly tested. 1216 func (Implementation) Stbsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n, k int, a []float32, lda int, x []float32, incX int) { 1217 if ul != blas.Lower && ul != blas.Upper { 1218 panic(badUplo) 1219 } 1220 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1221 panic(badTranspose) 1222 } 1223 if d != blas.NonUnit && d != blas.Unit { 1224 panic(badDiag) 1225 } 1226 if n < 0 { 1227 panic(nLT0) 1228 } 1229 if k < 0 { 1230 panic(kLT0) 1231 } 1232 if lda < k+1 { 1233 panic(badLdA) 1234 } 1235 if incX == 0 { 1236 panic(zeroIncX) 1237 } 1238 1239 // Quick return if possible. 1240 if n == 0 { 1241 return 1242 } 1243 1244 // For zero matrix size the following slice length checks are trivially satisfied. 1245 if len(a) < lda*(n-1)+k+1 { 1246 panic(shortA) 1247 } 1248 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1249 panic(shortX) 1250 } 1251 1252 var kx int 1253 if incX < 0 { 1254 kx = -(n - 1) * incX 1255 } 1256 nonUnit := d == blas.NonUnit 1257 // Form x = A^-1 x. 1258 // Several cases below use subslices for speed improvement. 1259 // The incX != 1 cases usually do not because incX may be negative. 1260 if tA == blas.NoTrans { 1261 if ul == blas.Upper { 1262 if incX == 1 { 1263 for i := n - 1; i >= 0; i-- { 1264 bands := k 1265 if i+bands >= n { 1266 bands = n - i - 1 1267 } 1268 atmp := a[i*lda+1:] 1269 xtmp := x[i+1 : i+bands+1] 1270 var sum float32 1271 for j, v := range xtmp { 1272 sum += v * atmp[j] 1273 } 1274 x[i] -= sum 1275 if nonUnit { 1276 x[i] /= a[i*lda] 1277 } 1278 } 1279 return 1280 } 1281 ix := kx + (n-1)*incX 1282 for i := n - 1; i >= 0; i-- { 1283 max := k + 1 1284 if i+max > n { 1285 max = n - i 1286 } 1287 atmp := a[i*lda:] 1288 var ( 1289 jx int 1290 sum float32 1291 ) 1292 for j := 1; j < max; j++ { 1293 jx += incX 1294 sum += x[ix+jx] * atmp[j] 1295 } 1296 x[ix] -= sum 1297 if nonUnit { 1298 x[ix] /= atmp[0] 1299 } 1300 ix -= incX 1301 } 1302 return 1303 } 1304 if incX == 1 { 1305 for i := 0; i < n; i++ { 1306 bands := k 1307 if i-k < 0 { 1308 bands = i 1309 } 1310 atmp := a[i*lda+k-bands:] 1311 xtmp := x[i-bands : i] 1312 var sum float32 1313 for j, v := range xtmp { 1314 sum += v * atmp[j] 1315 } 1316 x[i] -= sum 1317 if nonUnit { 1318 x[i] /= atmp[bands] 1319 } 1320 } 1321 return 1322 } 1323 ix := kx 1324 for i := 0; i < n; i++ { 1325 bands := k 1326 if i-k < 0 { 1327 bands = i 1328 } 1329 atmp := a[i*lda+k-bands:] 1330 var ( 1331 sum float32 1332 jx int 1333 ) 1334 for j := 0; j < bands; j++ { 1335 sum += x[ix-bands*incX+jx] * atmp[j] 1336 jx += incX 1337 } 1338 x[ix] -= sum 1339 if nonUnit { 1340 x[ix] /= atmp[bands] 1341 } 1342 ix += incX 1343 } 1344 return 1345 } 1346 // Cases where a is transposed. 1347 if ul == blas.Upper { 1348 if incX == 1 { 1349 for i := 0; i < n; i++ { 1350 bands := k 1351 if i-k < 0 { 1352 bands = i 1353 } 1354 var sum float32 1355 for j := 0; j < bands; j++ { 1356 sum += x[i-bands+j] * a[(i-bands+j)*lda+bands-j] 1357 } 1358 x[i] -= sum 1359 if nonUnit { 1360 x[i] /= a[i*lda] 1361 } 1362 } 1363 return 1364 } 1365 ix := kx 1366 for i := 0; i < n; i++ { 1367 bands := k 1368 if i-k < 0 { 1369 bands = i 1370 } 1371 var ( 1372 sum float32 1373 jx int 1374 ) 1375 for j := 0; j < bands; j++ { 1376 sum += x[ix-bands*incX+jx] * a[(i-bands+j)*lda+bands-j] 1377 jx += incX 1378 } 1379 x[ix] -= sum 1380 if nonUnit { 1381 x[ix] /= a[i*lda] 1382 } 1383 ix += incX 1384 } 1385 return 1386 } 1387 if incX == 1 { 1388 for i := n - 1; i >= 0; i-- { 1389 bands := k 1390 if i+bands >= n { 1391 bands = n - i - 1 1392 } 1393 var sum float32 1394 xtmp := x[i+1 : i+1+bands] 1395 for j, v := range xtmp { 1396 sum += v * a[(i+j+1)*lda+k-j-1] 1397 } 1398 x[i] -= sum 1399 if nonUnit { 1400 x[i] /= a[i*lda+k] 1401 } 1402 } 1403 return 1404 } 1405 ix := kx + (n-1)*incX 1406 for i := n - 1; i >= 0; i-- { 1407 bands := k 1408 if i+bands >= n { 1409 bands = n - i - 1 1410 } 1411 var ( 1412 sum float32 1413 jx int 1414 ) 1415 for j := 0; j < bands; j++ { 1416 sum += x[ix+jx+incX] * a[(i+j+1)*lda+k-j-1] 1417 jx += incX 1418 } 1419 x[ix] -= sum 1420 if nonUnit { 1421 x[ix] /= a[i*lda+k] 1422 } 1423 ix -= incX 1424 } 1425 } 1426 1427 // Ssbmv performs the matrix-vector operation 1428 // y = alpha * A * x + beta * y 1429 // where A is an n×n symmetric band matrix with k super-diagonals, x and y are 1430 // vectors, and alpha and beta are scalars. 1431 // 1432 // Float32 implementations are autogenerated and not directly tested. 1433 func (Implementation) Ssbmv(ul blas.Uplo, n, k int, alpha float32, a []float32, lda int, x []float32, incX int, beta float32, y []float32, incY int) { 1434 if ul != blas.Lower && ul != blas.Upper { 1435 panic(badUplo) 1436 } 1437 if n < 0 { 1438 panic(nLT0) 1439 } 1440 if k < 0 { 1441 panic(kLT0) 1442 } 1443 if lda < k+1 { 1444 panic(badLdA) 1445 } 1446 if incX == 0 { 1447 panic(zeroIncX) 1448 } 1449 if incY == 0 { 1450 panic(zeroIncY) 1451 } 1452 1453 // Quick return if possible. 1454 if n == 0 { 1455 return 1456 } 1457 1458 // For zero matrix size the following slice length checks are trivially satisfied. 1459 if len(a) < lda*(n-1)+k+1 { 1460 panic(shortA) 1461 } 1462 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1463 panic(shortX) 1464 } 1465 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1466 panic(shortY) 1467 } 1468 1469 // Quick return if possible. 1470 if alpha == 0 && beta == 1 { 1471 return 1472 } 1473 1474 // Set up indexes 1475 lenX := n 1476 lenY := n 1477 var kx, ky int 1478 if incX < 0 { 1479 kx = -(lenX - 1) * incX 1480 } 1481 if incY < 0 { 1482 ky = -(lenY - 1) * incY 1483 } 1484 1485 // Form y = beta * y. 1486 if beta != 1 { 1487 if incY == 1 { 1488 if beta == 0 { 1489 for i := range y[:n] { 1490 y[i] = 0 1491 } 1492 } else { 1493 f32.ScalUnitary(beta, y[:n]) 1494 } 1495 } else { 1496 iy := ky 1497 if beta == 0 { 1498 for i := 0; i < n; i++ { 1499 y[iy] = 0 1500 iy += incY 1501 } 1502 } else { 1503 if incY > 0 { 1504 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 1505 } else { 1506 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 1507 } 1508 } 1509 } 1510 } 1511 1512 if alpha == 0 { 1513 return 1514 } 1515 1516 if ul == blas.Upper { 1517 if incX == 1 { 1518 iy := ky 1519 for i := 0; i < n; i++ { 1520 atmp := a[i*lda:] 1521 tmp := alpha * x[i] 1522 sum := tmp * atmp[0] 1523 u := min(k, n-i-1) 1524 jy := incY 1525 for j := 1; j <= u; j++ { 1526 v := atmp[j] 1527 sum += alpha * x[i+j] * v 1528 y[iy+jy] += tmp * v 1529 jy += incY 1530 } 1531 y[iy] += sum 1532 iy += incY 1533 } 1534 return 1535 } 1536 ix := kx 1537 iy := ky 1538 for i := 0; i < n; i++ { 1539 atmp := a[i*lda:] 1540 tmp := alpha * x[ix] 1541 sum := tmp * atmp[0] 1542 u := min(k, n-i-1) 1543 jx := incX 1544 jy := incY 1545 for j := 1; j <= u; j++ { 1546 v := atmp[j] 1547 sum += alpha * x[ix+jx] * v 1548 y[iy+jy] += tmp * v 1549 jx += incX 1550 jy += incY 1551 } 1552 y[iy] += sum 1553 ix += incX 1554 iy += incY 1555 } 1556 return 1557 } 1558 1559 // Casses where a has bands below the diagonal. 1560 if incX == 1 { 1561 iy := ky 1562 for i := 0; i < n; i++ { 1563 l := max(0, k-i) 1564 tmp := alpha * x[i] 1565 jy := l * incY 1566 atmp := a[i*lda:] 1567 for j := l; j < k; j++ { 1568 v := atmp[j] 1569 y[iy] += alpha * v * x[i-k+j] 1570 y[iy-k*incY+jy] += tmp * v 1571 jy += incY 1572 } 1573 y[iy] += tmp * atmp[k] 1574 iy += incY 1575 } 1576 return 1577 } 1578 ix := kx 1579 iy := ky 1580 for i := 0; i < n; i++ { 1581 l := max(0, k-i) 1582 tmp := alpha * x[ix] 1583 jx := l * incX 1584 jy := l * incY 1585 atmp := a[i*lda:] 1586 for j := l; j < k; j++ { 1587 v := atmp[j] 1588 y[iy] += alpha * v * x[ix-k*incX+jx] 1589 y[iy-k*incY+jy] += tmp * v 1590 jx += incX 1591 jy += incY 1592 } 1593 y[iy] += tmp * atmp[k] 1594 ix += incX 1595 iy += incY 1596 } 1597 } 1598 1599 // Ssyr performs the symmetric rank-one update 1600 // A += alpha * x * xᵀ 1601 // where A is an n×n symmetric matrix, and x is a vector. 1602 // 1603 // Float32 implementations are autogenerated and not directly tested. 1604 func (Implementation) Ssyr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, a []float32, lda int) { 1605 if ul != blas.Lower && ul != blas.Upper { 1606 panic(badUplo) 1607 } 1608 if n < 0 { 1609 panic(nLT0) 1610 } 1611 if lda < max(1, n) { 1612 panic(badLdA) 1613 } 1614 if incX == 0 { 1615 panic(zeroIncX) 1616 } 1617 1618 // Quick return if possible. 1619 if n == 0 { 1620 return 1621 } 1622 1623 // For zero matrix size the following slice length checks are trivially satisfied. 1624 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1625 panic(shortX) 1626 } 1627 if len(a) < lda*(n-1)+n { 1628 panic(shortA) 1629 } 1630 1631 // Quick return if possible. 1632 if alpha == 0 { 1633 return 1634 } 1635 1636 lenX := n 1637 var kx int 1638 if incX < 0 { 1639 kx = -(lenX - 1) * incX 1640 } 1641 if ul == blas.Upper { 1642 if incX == 1 { 1643 for i := 0; i < n; i++ { 1644 tmp := x[i] * alpha 1645 if tmp != 0 { 1646 atmp := a[i*lda+i : i*lda+n] 1647 xtmp := x[i:n] 1648 for j, v := range xtmp { 1649 atmp[j] += v * tmp 1650 } 1651 } 1652 } 1653 return 1654 } 1655 ix := kx 1656 for i := 0; i < n; i++ { 1657 tmp := x[ix] * alpha 1658 if tmp != 0 { 1659 jx := ix 1660 atmp := a[i*lda:] 1661 for j := i; j < n; j++ { 1662 atmp[j] += x[jx] * tmp 1663 jx += incX 1664 } 1665 } 1666 ix += incX 1667 } 1668 return 1669 } 1670 // Cases where a is lower triangular. 1671 if incX == 1 { 1672 for i := 0; i < n; i++ { 1673 tmp := x[i] * alpha 1674 if tmp != 0 { 1675 atmp := a[i*lda:] 1676 xtmp := x[:i+1] 1677 for j, v := range xtmp { 1678 atmp[j] += tmp * v 1679 } 1680 } 1681 } 1682 return 1683 } 1684 ix := kx 1685 for i := 0; i < n; i++ { 1686 tmp := x[ix] * alpha 1687 if tmp != 0 { 1688 atmp := a[i*lda:] 1689 jx := kx 1690 for j := 0; j < i+1; j++ { 1691 atmp[j] += tmp * x[jx] 1692 jx += incX 1693 } 1694 } 1695 ix += incX 1696 } 1697 } 1698 1699 // Ssyr2 performs the symmetric rank-two update 1700 // A += alpha * x * yᵀ + alpha * y * xᵀ 1701 // where A is an n×n symmetric matrix, x and y are vectors, and alpha is a scalar. 1702 // 1703 // Float32 implementations are autogenerated and not directly tested. 1704 func (Implementation) Ssyr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, a []float32, lda int) { 1705 if ul != blas.Lower && ul != blas.Upper { 1706 panic(badUplo) 1707 } 1708 if n < 0 { 1709 panic(nLT0) 1710 } 1711 if lda < max(1, n) { 1712 panic(badLdA) 1713 } 1714 if incX == 0 { 1715 panic(zeroIncX) 1716 } 1717 if incY == 0 { 1718 panic(zeroIncY) 1719 } 1720 1721 // Quick return if possible. 1722 if n == 0 { 1723 return 1724 } 1725 1726 // For zero matrix size the following slice length checks are trivially satisfied. 1727 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1728 panic(shortX) 1729 } 1730 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 1731 panic(shortY) 1732 } 1733 if len(a) < lda*(n-1)+n { 1734 panic(shortA) 1735 } 1736 1737 // Quick return if possible. 1738 if alpha == 0 { 1739 return 1740 } 1741 1742 var ky, kx int 1743 if incY < 0 { 1744 ky = -(n - 1) * incY 1745 } 1746 if incX < 0 { 1747 kx = -(n - 1) * incX 1748 } 1749 if ul == blas.Upper { 1750 if incX == 1 && incY == 1 { 1751 for i := 0; i < n; i++ { 1752 xi := x[i] 1753 yi := y[i] 1754 atmp := a[i*lda:] 1755 for j := i; j < n; j++ { 1756 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1757 } 1758 } 1759 return 1760 } 1761 ix := kx 1762 iy := ky 1763 for i := 0; i < n; i++ { 1764 jx := kx + i*incX 1765 jy := ky + i*incY 1766 xi := x[ix] 1767 yi := y[iy] 1768 atmp := a[i*lda:] 1769 for j := i; j < n; j++ { 1770 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1771 jx += incX 1772 jy += incY 1773 } 1774 ix += incX 1775 iy += incY 1776 } 1777 return 1778 } 1779 if incX == 1 && incY == 1 { 1780 for i := 0; i < n; i++ { 1781 xi := x[i] 1782 yi := y[i] 1783 atmp := a[i*lda:] 1784 for j := 0; j <= i; j++ { 1785 atmp[j] += alpha * (xi*y[j] + x[j]*yi) 1786 } 1787 } 1788 return 1789 } 1790 ix := kx 1791 iy := ky 1792 for i := 0; i < n; i++ { 1793 jx := kx 1794 jy := ky 1795 xi := x[ix] 1796 yi := y[iy] 1797 atmp := a[i*lda:] 1798 for j := 0; j <= i; j++ { 1799 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 1800 jx += incX 1801 jy += incY 1802 } 1803 ix += incX 1804 iy += incY 1805 } 1806 } 1807 1808 // Stpsv solves one of the systems of equations 1809 // A * x = b if tA == blas.NoTrans 1810 // Aᵀ * x = b if tA == blas.Trans or blas.ConjTrans 1811 // where A is an n×n triangular matrix in packed format, and x and b are vectors. 1812 // 1813 // At entry to the function, x contains the values of b, and the result is 1814 // stored in-place into x. 1815 // 1816 // No test for singularity or near-singularity is included in this 1817 // routine. Such tests must be performed before calling this routine. 1818 // 1819 // Float32 implementations are autogenerated and not directly tested. 1820 func (Implementation) Stpsv(ul blas.Uplo, tA blas.Transpose, d blas.Diag, n int, ap []float32, x []float32, incX int) { 1821 if ul != blas.Lower && ul != blas.Upper { 1822 panic(badUplo) 1823 } 1824 if tA != blas.NoTrans && tA != blas.Trans && tA != blas.ConjTrans { 1825 panic(badTranspose) 1826 } 1827 if d != blas.NonUnit && d != blas.Unit { 1828 panic(badDiag) 1829 } 1830 if n < 0 { 1831 panic(nLT0) 1832 } 1833 if incX == 0 { 1834 panic(zeroIncX) 1835 } 1836 1837 // Quick return if possible. 1838 if n == 0 { 1839 return 1840 } 1841 1842 // For zero matrix size the following slice length checks are trivially satisfied. 1843 if len(ap) < n*(n+1)/2 { 1844 panic(shortAP) 1845 } 1846 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 1847 panic(shortX) 1848 } 1849 1850 var kx int 1851 if incX < 0 { 1852 kx = -(n - 1) * incX 1853 } 1854 1855 nonUnit := d == blas.NonUnit 1856 var offset int // Offset is the index of (i,i) 1857 if tA == blas.NoTrans { 1858 if ul == blas.Upper { 1859 offset = n*(n+1)/2 - 1 1860 if incX == 1 { 1861 for i := n - 1; i >= 0; i-- { 1862 atmp := ap[offset+1 : offset+n-i] 1863 xtmp := x[i+1:] 1864 var sum float32 1865 for j, v := range atmp { 1866 sum += v * xtmp[j] 1867 } 1868 x[i] -= sum 1869 if nonUnit { 1870 x[i] /= ap[offset] 1871 } 1872 offset -= n - i + 1 1873 } 1874 return 1875 } 1876 ix := kx + (n-1)*incX 1877 for i := n - 1; i >= 0; i-- { 1878 atmp := ap[offset+1 : offset+n-i] 1879 jx := kx + (i+1)*incX 1880 var sum float32 1881 for _, v := range atmp { 1882 sum += v * x[jx] 1883 jx += incX 1884 } 1885 x[ix] -= sum 1886 if nonUnit { 1887 x[ix] /= ap[offset] 1888 } 1889 ix -= incX 1890 offset -= n - i + 1 1891 } 1892 return 1893 } 1894 if incX == 1 { 1895 for i := 0; i < n; i++ { 1896 atmp := ap[offset-i : offset] 1897 var sum float32 1898 for j, v := range atmp { 1899 sum += v * x[j] 1900 } 1901 x[i] -= sum 1902 if nonUnit { 1903 x[i] /= ap[offset] 1904 } 1905 offset += i + 2 1906 } 1907 return 1908 } 1909 ix := kx 1910 for i := 0; i < n; i++ { 1911 jx := kx 1912 atmp := ap[offset-i : offset] 1913 var sum float32 1914 for _, v := range atmp { 1915 sum += v * x[jx] 1916 jx += incX 1917 } 1918 x[ix] -= sum 1919 if nonUnit { 1920 x[ix] /= ap[offset] 1921 } 1922 ix += incX 1923 offset += i + 2 1924 } 1925 return 1926 } 1927 // Cases where ap is transposed. 1928 if ul == blas.Upper { 1929 if incX == 1 { 1930 for i := 0; i < n; i++ { 1931 if nonUnit { 1932 x[i] /= ap[offset] 1933 } 1934 xi := x[i] 1935 atmp := ap[offset+1 : offset+n-i] 1936 xtmp := x[i+1:] 1937 for j, v := range atmp { 1938 xtmp[j] -= v * xi 1939 } 1940 offset += n - i 1941 } 1942 return 1943 } 1944 ix := kx 1945 for i := 0; i < n; i++ { 1946 if nonUnit { 1947 x[ix] /= ap[offset] 1948 } 1949 xix := x[ix] 1950 atmp := ap[offset+1 : offset+n-i] 1951 jx := kx + (i+1)*incX 1952 for _, v := range atmp { 1953 x[jx] -= v * xix 1954 jx += incX 1955 } 1956 ix += incX 1957 offset += n - i 1958 } 1959 return 1960 } 1961 if incX == 1 { 1962 offset = n*(n+1)/2 - 1 1963 for i := n - 1; i >= 0; i-- { 1964 if nonUnit { 1965 x[i] /= ap[offset] 1966 } 1967 xi := x[i] 1968 atmp := ap[offset-i : offset] 1969 for j, v := range atmp { 1970 x[j] -= v * xi 1971 } 1972 offset -= i + 1 1973 } 1974 return 1975 } 1976 ix := kx + (n-1)*incX 1977 offset = n*(n+1)/2 - 1 1978 for i := n - 1; i >= 0; i-- { 1979 if nonUnit { 1980 x[ix] /= ap[offset] 1981 } 1982 xix := x[ix] 1983 atmp := ap[offset-i : offset] 1984 jx := kx 1985 for _, v := range atmp { 1986 x[jx] -= v * xix 1987 jx += incX 1988 } 1989 ix -= incX 1990 offset -= i + 1 1991 } 1992 } 1993 1994 // Sspmv performs the matrix-vector operation 1995 // y = alpha * A * x + beta * y 1996 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 1997 // and alpha and beta are scalars. 1998 // 1999 // Float32 implementations are autogenerated and not directly tested. 2000 func (Implementation) Sspmv(ul blas.Uplo, n int, alpha float32, ap []float32, x []float32, incX int, beta float32, y []float32, incY int) { 2001 if ul != blas.Lower && ul != blas.Upper { 2002 panic(badUplo) 2003 } 2004 if n < 0 { 2005 panic(nLT0) 2006 } 2007 if incX == 0 { 2008 panic(zeroIncX) 2009 } 2010 if incY == 0 { 2011 panic(zeroIncY) 2012 } 2013 2014 // Quick return if possible. 2015 if n == 0 { 2016 return 2017 } 2018 2019 // For zero matrix size the following slice length checks are trivially satisfied. 2020 if len(ap) < n*(n+1)/2 { 2021 panic(shortAP) 2022 } 2023 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2024 panic(shortX) 2025 } 2026 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2027 panic(shortY) 2028 } 2029 2030 // Quick return if possible. 2031 if alpha == 0 && beta == 1 { 2032 return 2033 } 2034 2035 // Set up start points 2036 var kx, ky int 2037 if incX < 0 { 2038 kx = -(n - 1) * incX 2039 } 2040 if incY < 0 { 2041 ky = -(n - 1) * incY 2042 } 2043 2044 // Form y = beta * y. 2045 if beta != 1 { 2046 if incY == 1 { 2047 if beta == 0 { 2048 for i := range y[:n] { 2049 y[i] = 0 2050 } 2051 } else { 2052 f32.ScalUnitary(beta, y[:n]) 2053 } 2054 } else { 2055 iy := ky 2056 if beta == 0 { 2057 for i := 0; i < n; i++ { 2058 y[iy] = 0 2059 iy += incY 2060 } 2061 } else { 2062 if incY > 0 { 2063 f32.ScalInc(beta, y, uintptr(n), uintptr(incY)) 2064 } else { 2065 f32.ScalInc(beta, y, uintptr(n), uintptr(-incY)) 2066 } 2067 } 2068 } 2069 } 2070 2071 if alpha == 0 { 2072 return 2073 } 2074 2075 if n == 1 { 2076 y[0] += alpha * ap[0] * x[0] 2077 return 2078 } 2079 var offset int // Offset is the index of (i,i). 2080 if ul == blas.Upper { 2081 if incX == 1 { 2082 iy := ky 2083 for i := 0; i < n; i++ { 2084 xv := x[i] * alpha 2085 sum := ap[offset] * x[i] 2086 atmp := ap[offset+1 : offset+n-i] 2087 xtmp := x[i+1:] 2088 jy := ky + (i+1)*incY 2089 for j, v := range atmp { 2090 sum += v * xtmp[j] 2091 y[jy] += v * xv 2092 jy += incY 2093 } 2094 y[iy] += alpha * sum 2095 iy += incY 2096 offset += n - i 2097 } 2098 return 2099 } 2100 ix := kx 2101 iy := ky 2102 for i := 0; i < n; i++ { 2103 xv := x[ix] * alpha 2104 sum := ap[offset] * x[ix] 2105 atmp := ap[offset+1 : offset+n-i] 2106 jx := kx + (i+1)*incX 2107 jy := ky + (i+1)*incY 2108 for _, v := range atmp { 2109 sum += v * x[jx] 2110 y[jy] += v * xv 2111 jx += incX 2112 jy += incY 2113 } 2114 y[iy] += alpha * sum 2115 ix += incX 2116 iy += incY 2117 offset += n - i 2118 } 2119 return 2120 } 2121 if incX == 1 { 2122 iy := ky 2123 for i := 0; i < n; i++ { 2124 xv := x[i] * alpha 2125 atmp := ap[offset-i : offset] 2126 jy := ky 2127 var sum float32 2128 for j, v := range atmp { 2129 sum += v * x[j] 2130 y[jy] += v * xv 2131 jy += incY 2132 } 2133 sum += ap[offset] * x[i] 2134 y[iy] += alpha * sum 2135 iy += incY 2136 offset += i + 2 2137 } 2138 return 2139 } 2140 ix := kx 2141 iy := ky 2142 for i := 0; i < n; i++ { 2143 xv := x[ix] * alpha 2144 atmp := ap[offset-i : offset] 2145 jx := kx 2146 jy := ky 2147 var sum float32 2148 for _, v := range atmp { 2149 sum += v * x[jx] 2150 y[jy] += v * xv 2151 jx += incX 2152 jy += incY 2153 } 2154 2155 sum += ap[offset] * x[ix] 2156 y[iy] += alpha * sum 2157 ix += incX 2158 iy += incY 2159 offset += i + 2 2160 } 2161 } 2162 2163 // Sspr performs the symmetric rank-one operation 2164 // A += alpha * x * xᵀ 2165 // where A is an n×n symmetric matrix in packed format, x is a vector, and 2166 // alpha is a scalar. 2167 // 2168 // Float32 implementations are autogenerated and not directly tested. 2169 func (Implementation) Sspr(ul blas.Uplo, n int, alpha float32, x []float32, incX int, ap []float32) { 2170 if ul != blas.Lower && ul != blas.Upper { 2171 panic(badUplo) 2172 } 2173 if n < 0 { 2174 panic(nLT0) 2175 } 2176 if incX == 0 { 2177 panic(zeroIncX) 2178 } 2179 2180 // Quick return if possible. 2181 if n == 0 { 2182 return 2183 } 2184 2185 // For zero matrix size the following slice length checks are trivially satisfied. 2186 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2187 panic(shortX) 2188 } 2189 if len(ap) < n*(n+1)/2 { 2190 panic(shortAP) 2191 } 2192 2193 // Quick return if possible. 2194 if alpha == 0 { 2195 return 2196 } 2197 2198 lenX := n 2199 var kx int 2200 if incX < 0 { 2201 kx = -(lenX - 1) * incX 2202 } 2203 var offset int // Offset is the index of (i,i). 2204 if ul == blas.Upper { 2205 if incX == 1 { 2206 for i := 0; i < n; i++ { 2207 atmp := ap[offset:] 2208 xv := alpha * x[i] 2209 xtmp := x[i:n] 2210 for j, v := range xtmp { 2211 atmp[j] += xv * v 2212 } 2213 offset += n - i 2214 } 2215 return 2216 } 2217 ix := kx 2218 for i := 0; i < n; i++ { 2219 jx := kx + i*incX 2220 atmp := ap[offset:] 2221 xv := alpha * x[ix] 2222 for j := 0; j < n-i; j++ { 2223 atmp[j] += xv * x[jx] 2224 jx += incX 2225 } 2226 ix += incX 2227 offset += n - i 2228 } 2229 return 2230 } 2231 if incX == 1 { 2232 for i := 0; i < n; i++ { 2233 atmp := ap[offset-i:] 2234 xv := alpha * x[i] 2235 xtmp := x[:i+1] 2236 for j, v := range xtmp { 2237 atmp[j] += xv * v 2238 } 2239 offset += i + 2 2240 } 2241 return 2242 } 2243 ix := kx 2244 for i := 0; i < n; i++ { 2245 jx := kx 2246 atmp := ap[offset-i:] 2247 xv := alpha * x[ix] 2248 for j := 0; j <= i; j++ { 2249 atmp[j] += xv * x[jx] 2250 jx += incX 2251 } 2252 ix += incX 2253 offset += i + 2 2254 } 2255 } 2256 2257 // Sspr2 performs the symmetric rank-2 update 2258 // A += alpha * x * yᵀ + alpha * y * xᵀ 2259 // where A is an n×n symmetric matrix in packed format, x and y are vectors, 2260 // and alpha is a scalar. 2261 // 2262 // Float32 implementations are autogenerated and not directly tested. 2263 func (Implementation) Sspr2(ul blas.Uplo, n int, alpha float32, x []float32, incX int, y []float32, incY int, ap []float32) { 2264 if ul != blas.Lower && ul != blas.Upper { 2265 panic(badUplo) 2266 } 2267 if n < 0 { 2268 panic(nLT0) 2269 } 2270 if incX == 0 { 2271 panic(zeroIncX) 2272 } 2273 if incY == 0 { 2274 panic(zeroIncY) 2275 } 2276 2277 // Quick return if possible. 2278 if n == 0 { 2279 return 2280 } 2281 2282 // For zero matrix size the following slice length checks are trivially satisfied. 2283 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 2284 panic(shortX) 2285 } 2286 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 2287 panic(shortY) 2288 } 2289 if len(ap) < n*(n+1)/2 { 2290 panic(shortAP) 2291 } 2292 2293 // Quick return if possible. 2294 if alpha == 0 { 2295 return 2296 } 2297 2298 var ky, kx int 2299 if incY < 0 { 2300 ky = -(n - 1) * incY 2301 } 2302 if incX < 0 { 2303 kx = -(n - 1) * incX 2304 } 2305 var offset int // Offset is the index of (i,i). 2306 if ul == blas.Upper { 2307 if incX == 1 && incY == 1 { 2308 for i := 0; i < n; i++ { 2309 atmp := ap[offset:] 2310 xi := x[i] 2311 yi := y[i] 2312 xtmp := x[i:n] 2313 ytmp := y[i:n] 2314 for j, v := range xtmp { 2315 atmp[j] += alpha * (xi*ytmp[j] + v*yi) 2316 } 2317 offset += n - i 2318 } 2319 return 2320 } 2321 ix := kx 2322 iy := ky 2323 for i := 0; i < n; i++ { 2324 jx := kx + i*incX 2325 jy := ky + i*incY 2326 atmp := ap[offset:] 2327 xi := x[ix] 2328 yi := y[iy] 2329 for j := 0; j < n-i; j++ { 2330 atmp[j] += alpha * (xi*y[jy] + x[jx]*yi) 2331 jx += incX 2332 jy += incY 2333 } 2334 ix += incX 2335 iy += incY 2336 offset += n - i 2337 } 2338 return 2339 } 2340 if incX == 1 && incY == 1 { 2341 for i := 0; i < n; i++ { 2342 atmp := ap[offset-i:] 2343 xi := x[i] 2344 yi := y[i] 2345 xtmp := x[:i+1] 2346 for j, v := range xtmp { 2347 atmp[j] += alpha * (xi*y[j] + v*yi) 2348 } 2349 offset += i + 2 2350 } 2351 return 2352 } 2353 ix := kx 2354 iy := ky 2355 for i := 0; i < n; i++ { 2356 jx := kx 2357 jy := ky 2358 atmp := ap[offset-i:] 2359 for j := 0; j <= i; j++ { 2360 atmp[j] += alpha * (x[ix]*y[jy] + x[jx]*y[iy]) 2361 jx += incX 2362 jy += incY 2363 } 2364 ix += incX 2365 iy += incY 2366 offset += i + 2 2367 } 2368 }