github.com/gopherd/gonum@v0.0.4/blas/gonum/level1float32.go (about) 1 // Code generated by "go generate github.com/gopherd/gonum/blas/gonum”; DO NOT EDIT. 2 3 // Copyright ©2015 The Gonum Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 package gonum 8 9 import ( 10 math "github.com/gopherd/gonum/internal/math32" 11 12 "github.com/gopherd/gonum/blas" 13 "github.com/gopherd/gonum/internal/asm/f32" 14 ) 15 16 var _ blas.Float32Level1 = Implementation{} 17 18 // Snrm2 computes the Euclidean norm of a vector, 19 // sqrt(\sum_i x[i] * x[i]). 20 // This function returns 0 if incX is negative. 21 // 22 // Float32 implementations are autogenerated and not directly tested. 23 func (Implementation) Snrm2(n int, x []float32, incX int) float32 { 24 if incX < 1 { 25 if incX == 0 { 26 panic(zeroIncX) 27 } 28 return 0 29 } 30 if len(x) <= (n-1)*incX { 31 panic(shortX) 32 } 33 if n < 2 { 34 if n == 1 { 35 return math.Abs(x[0]) 36 } 37 if n == 0 { 38 return 0 39 } 40 panic(nLT0) 41 } 42 if incX == 1 { 43 return f32.L2NormUnitary(x[:n]) 44 } 45 return f32.L2NormInc(x, uintptr(n), uintptr(incX)) 46 } 47 48 // Sasum computes the sum of the absolute values of the elements of x. 49 // \sum_i |x[i]| 50 // Sasum returns 0 if incX is negative. 51 // 52 // Float32 implementations are autogenerated and not directly tested. 53 func (Implementation) Sasum(n int, x []float32, incX int) float32 { 54 var sum float32 55 if n < 0 { 56 panic(nLT0) 57 } 58 if incX < 1 { 59 if incX == 0 { 60 panic(zeroIncX) 61 } 62 return 0 63 } 64 if len(x) <= (n-1)*incX { 65 panic(shortX) 66 } 67 if incX == 1 { 68 x = x[:n] 69 for _, v := range x { 70 sum += math.Abs(v) 71 } 72 return sum 73 } 74 for i := 0; i < n; i++ { 75 sum += math.Abs(x[i*incX]) 76 } 77 return sum 78 } 79 80 // Isamax returns the index of an element of x with the largest absolute value. 81 // If there are multiple such indices the earliest is returned. 82 // Isamax returns -1 if n == 0. 83 // 84 // Float32 implementations are autogenerated and not directly tested. 85 func (Implementation) Isamax(n int, x []float32, incX int) int { 86 if incX < 1 { 87 if incX == 0 { 88 panic(zeroIncX) 89 } 90 return -1 91 } 92 if len(x) <= (n-1)*incX { 93 panic(shortX) 94 } 95 if n < 2 { 96 if n == 1 { 97 return 0 98 } 99 if n == 0 { 100 return -1 // Netlib returns invalid index when n == 0. 101 } 102 panic(nLT0) 103 } 104 idx := 0 105 max := math.Abs(x[0]) 106 if incX == 1 { 107 for i, v := range x[:n] { 108 absV := math.Abs(v) 109 if absV > max { 110 max = absV 111 idx = i 112 } 113 } 114 return idx 115 } 116 ix := incX 117 for i := 1; i < n; i++ { 118 v := x[ix] 119 absV := math.Abs(v) 120 if absV > max { 121 max = absV 122 idx = i 123 } 124 ix += incX 125 } 126 return idx 127 } 128 129 // Sswap exchanges the elements of two vectors. 130 // x[i], y[i] = y[i], x[i] for all i 131 // 132 // Float32 implementations are autogenerated and not directly tested. 133 func (Implementation) Sswap(n int, x []float32, incX int, y []float32, incY int) { 134 if incX == 0 { 135 panic(zeroIncX) 136 } 137 if incY == 0 { 138 panic(zeroIncY) 139 } 140 if n < 1 { 141 if n == 0 { 142 return 143 } 144 panic(nLT0) 145 } 146 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 147 panic(shortX) 148 } 149 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 150 panic(shortY) 151 } 152 if incX == 1 && incY == 1 { 153 x = x[:n] 154 for i, v := range x { 155 x[i], y[i] = y[i], v 156 } 157 return 158 } 159 var ix, iy int 160 if incX < 0 { 161 ix = (-n + 1) * incX 162 } 163 if incY < 0 { 164 iy = (-n + 1) * incY 165 } 166 for i := 0; i < n; i++ { 167 x[ix], y[iy] = y[iy], x[ix] 168 ix += incX 169 iy += incY 170 } 171 } 172 173 // Scopy copies the elements of x into the elements of y. 174 // y[i] = x[i] for all i 175 // 176 // Float32 implementations are autogenerated and not directly tested. 177 func (Implementation) Scopy(n int, x []float32, incX int, y []float32, incY int) { 178 if incX == 0 { 179 panic(zeroIncX) 180 } 181 if incY == 0 { 182 panic(zeroIncY) 183 } 184 if n < 1 { 185 if n == 0 { 186 return 187 } 188 panic(nLT0) 189 } 190 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 191 panic(shortX) 192 } 193 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 194 panic(shortY) 195 } 196 if incX == 1 && incY == 1 { 197 copy(y[:n], x[:n]) 198 return 199 } 200 var ix, iy int 201 if incX < 0 { 202 ix = (-n + 1) * incX 203 } 204 if incY < 0 { 205 iy = (-n + 1) * incY 206 } 207 for i := 0; i < n; i++ { 208 y[iy] = x[ix] 209 ix += incX 210 iy += incY 211 } 212 } 213 214 // Saxpy adds alpha times x to y 215 // y[i] += alpha * x[i] for all i 216 // 217 // Float32 implementations are autogenerated and not directly tested. 218 func (Implementation) Saxpy(n int, alpha float32, x []float32, incX int, y []float32, incY int) { 219 if incX == 0 { 220 panic(zeroIncX) 221 } 222 if incY == 0 { 223 panic(zeroIncY) 224 } 225 if n < 1 { 226 if n == 0 { 227 return 228 } 229 panic(nLT0) 230 } 231 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 232 panic(shortX) 233 } 234 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 235 panic(shortY) 236 } 237 if alpha == 0 { 238 return 239 } 240 if incX == 1 && incY == 1 { 241 f32.AxpyUnitary(alpha, x[:n], y[:n]) 242 return 243 } 244 var ix, iy int 245 if incX < 0 { 246 ix = (-n + 1) * incX 247 } 248 if incY < 0 { 249 iy = (-n + 1) * incY 250 } 251 f32.AxpyInc(alpha, x, y, uintptr(n), uintptr(incX), uintptr(incY), uintptr(ix), uintptr(iy)) 252 } 253 254 // Srotg computes a plane rotation 255 // ⎡ c s ⎤ ⎡ a ⎤ = ⎡ r ⎤ 256 // ⎣ -s c ⎦ ⎣ b ⎦ ⎣ 0 ⎦ 257 // satisfying c^2 + s^2 = 1. 258 // 259 // The computation uses the formulas 260 // sigma = sgn(a) if |a| > |b| 261 // = sgn(b) if |b| >= |a| 262 // r = sigma*sqrt(a^2 + b^2) 263 // c = 1; s = 0 if r = 0 264 // c = a/r; s = b/r if r != 0 265 // c >= 0 if |a| > |b| 266 // 267 // The subroutine also computes 268 // z = s if |a| > |b|, 269 // = 1/c if |b| >= |a| and c != 0 270 // = 1 if c = 0 271 // This allows c and s to be reconstructed from z as follows: 272 // If z = 1, set c = 0, s = 1. 273 // If |z| < 1, set c = sqrt(1 - z^2) and s = z. 274 // If |z| > 1, set c = 1/z and s = sqrt(1 - c^2). 275 // 276 // NOTE: There is a discrepancy between the reference implementation and the 277 // BLAS technical manual regarding the sign for r when a or b are zero. Drotg 278 // agrees with the definition in the manual and other common BLAS 279 // implementations. 280 // 281 // Float32 implementations are autogenerated and not directly tested. 282 func (Implementation) Srotg(a, b float32) (c, s, r, z float32) { 283 // Implementation based on Supplemental Material to: 284 // Edward Anderson. 2017. Algorithm 978: Safe Scaling in the Level 1 BLAS. 285 // ACM Trans. Math. Softw. 44, 1, Article 12 (July 2017), 28 pages. 286 // DOI: https://doi.org/10.1145/3061665 287 const ( 288 safmin = 0x1p-126 289 safmax = 1 / safmin 290 ) 291 anorm := math.Abs(a) 292 bnorm := math.Abs(b) 293 switch { 294 case bnorm == 0: 295 c = 1 296 s = 0 297 r = a 298 z = 0 299 case anorm == 0: 300 c = 0 301 s = 1 302 r = b 303 z = 1 304 default: 305 maxab := math.Max(anorm, bnorm) 306 scl := math.Min(math.Max(safmin, maxab), safmax) 307 var sigma float32 308 if anorm > bnorm { 309 sigma = math.Copysign(1, a) 310 } else { 311 sigma = math.Copysign(1, b) 312 } 313 ascl := a / scl 314 bscl := b / scl 315 r = sigma * (scl * math.Sqrt(ascl*ascl+bscl*bscl)) 316 c = a / r 317 s = b / r 318 switch { 319 case anorm > bnorm: 320 z = s 321 case c != 0: 322 z = 1 / c 323 default: 324 z = 1 325 } 326 } 327 return c, s, r, z 328 } 329 330 // Srotmg computes the modified Givens rotation. See 331 // http://www.netlib.org/lapack/explore-html/df/deb/drotmg_8f.html 332 // for more details. 333 // 334 // Float32 implementations are autogenerated and not directly tested. 335 func (Implementation) Srotmg(d1, d2, x1, y1 float32) (p blas.SrotmParams, rd1, rd2, rx1 float32) { 336 // The implementation of Drotmg used here is taken from Hopkins 1997 337 // Appendix A: https://doi.org/10.1145/289251.289253 338 // with the exception of the gam constants below. 339 340 const ( 341 gam = 4096.0 342 gamsq = gam * gam 343 rgamsq = 1.0 / gamsq 344 ) 345 346 if d1 < 0 { 347 p.Flag = blas.Rescaling // Error state. 348 return p, 0, 0, 0 349 } 350 351 if d2 == 0 || y1 == 0 { 352 p.Flag = blas.Identity 353 return p, d1, d2, x1 354 } 355 356 var h11, h12, h21, h22 float32 357 if (d1 == 0 || x1 == 0) && d2 > 0 { 358 p.Flag = blas.Diagonal 359 h12 = 1 360 h21 = -1 361 x1 = y1 362 d1, d2 = d2, d1 363 } else { 364 p2 := d2 * y1 365 p1 := d1 * x1 366 q2 := p2 * y1 367 q1 := p1 * x1 368 if math.Abs(q1) > math.Abs(q2) { 369 p.Flag = blas.OffDiagonal 370 h11 = 1 371 h22 = 1 372 h21 = -y1 / x1 373 h12 = p2 / p1 374 u := 1 - float32(h12*h21) 375 if u <= 0 { 376 p.Flag = blas.Rescaling // Error state. 377 return p, 0, 0, 0 378 } 379 380 d1 /= u 381 d2 /= u 382 x1 *= u 383 } else { 384 if q2 < 0 { 385 p.Flag = blas.Rescaling // Error state. 386 return p, 0, 0, 0 387 } 388 389 p.Flag = blas.Diagonal 390 h21 = -1 391 h12 = 1 392 h11 = p1 / p2 393 h22 = x1 / y1 394 u := 1 + float32(h11*h22) 395 d1, d2 = d2/u, d1/u 396 x1 = y1 * u 397 } 398 } 399 400 for d1 <= rgamsq && d1 != 0 { 401 p.Flag = blas.Rescaling 402 d1 = (d1 * gam) * gam 403 x1 /= gam 404 h11 /= gam 405 h12 /= gam 406 } 407 for d1 > gamsq { 408 p.Flag = blas.Rescaling 409 d1 = (d1 / gam) / gam 410 x1 *= gam 411 h11 *= gam 412 h12 *= gam 413 } 414 415 for math.Abs(d2) <= rgamsq && d2 != 0 { 416 p.Flag = blas.Rescaling 417 d2 = (d2 * gam) * gam 418 h21 /= gam 419 h22 /= gam 420 } 421 for math.Abs(d2) > gamsq { 422 p.Flag = blas.Rescaling 423 d2 = (d2 / gam) / gam 424 h21 *= gam 425 h22 *= gam 426 } 427 428 switch p.Flag { 429 case blas.Diagonal: 430 p.H = [4]float32{0: h11, 3: h22} 431 case blas.OffDiagonal: 432 p.H = [4]float32{1: h21, 2: h12} 433 case blas.Rescaling: 434 p.H = [4]float32{h11, h21, h12, h22} 435 default: 436 panic(badFlag) 437 } 438 439 return p, d1, d2, x1 440 } 441 442 // Srot applies a plane transformation. 443 // x[i] = c * x[i] + s * y[i] 444 // y[i] = c * y[i] - s * x[i] 445 // 446 // Float32 implementations are autogenerated and not directly tested. 447 func (Implementation) Srot(n int, x []float32, incX int, y []float32, incY int, c float32, s float32) { 448 if incX == 0 { 449 panic(zeroIncX) 450 } 451 if incY == 0 { 452 panic(zeroIncY) 453 } 454 if n < 1 { 455 if n == 0 { 456 return 457 } 458 panic(nLT0) 459 } 460 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 461 panic(shortX) 462 } 463 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 464 panic(shortY) 465 } 466 if incX == 1 && incY == 1 { 467 x = x[:n] 468 for i, vx := range x { 469 vy := y[i] 470 x[i], y[i] = c*vx+s*vy, c*vy-s*vx 471 } 472 return 473 } 474 var ix, iy int 475 if incX < 0 { 476 ix = (-n + 1) * incX 477 } 478 if incY < 0 { 479 iy = (-n + 1) * incY 480 } 481 for i := 0; i < n; i++ { 482 vx := x[ix] 483 vy := y[iy] 484 x[ix], y[iy] = c*vx+s*vy, c*vy-s*vx 485 ix += incX 486 iy += incY 487 } 488 } 489 490 // Srotm applies the modified Givens rotation to the 2×n matrix. 491 // 492 // Float32 implementations are autogenerated and not directly tested. 493 func (Implementation) Srotm(n int, x []float32, incX int, y []float32, incY int, p blas.SrotmParams) { 494 if incX == 0 { 495 panic(zeroIncX) 496 } 497 if incY == 0 { 498 panic(zeroIncY) 499 } 500 if n <= 0 { 501 if n == 0 { 502 return 503 } 504 panic(nLT0) 505 } 506 if (incX > 0 && len(x) <= (n-1)*incX) || (incX < 0 && len(x) <= (1-n)*incX) { 507 panic(shortX) 508 } 509 if (incY > 0 && len(y) <= (n-1)*incY) || (incY < 0 && len(y) <= (1-n)*incY) { 510 panic(shortY) 511 } 512 513 if p.Flag == blas.Identity { 514 return 515 } 516 517 switch p.Flag { 518 case blas.Rescaling: 519 h11 := p.H[0] 520 h12 := p.H[2] 521 h21 := p.H[1] 522 h22 := p.H[3] 523 if incX == 1 && incY == 1 { 524 x = x[:n] 525 for i, vx := range x { 526 vy := y[i] 527 x[i], y[i] = float32(vx*h11)+float32(vy*h12), float32(vx*h21)+float32(vy*h22) 528 } 529 return 530 } 531 var ix, iy int 532 if incX < 0 { 533 ix = (-n + 1) * incX 534 } 535 if incY < 0 { 536 iy = (-n + 1) * incY 537 } 538 for i := 0; i < n; i++ { 539 vx := x[ix] 540 vy := y[iy] 541 x[ix], y[iy] = float32(vx*h11)+float32(vy*h12), float32(vx*h21)+float32(vy*h22) 542 ix += incX 543 iy += incY 544 } 545 case blas.OffDiagonal: 546 h12 := p.H[2] 547 h21 := p.H[1] 548 if incX == 1 && incY == 1 { 549 x = x[:n] 550 for i, vx := range x { 551 vy := y[i] 552 x[i], y[i] = vx+float32(vy*h12), float32(vx*h21)+vy 553 } 554 return 555 } 556 var ix, iy int 557 if incX < 0 { 558 ix = (-n + 1) * incX 559 } 560 if incY < 0 { 561 iy = (-n + 1) * incY 562 } 563 for i := 0; i < n; i++ { 564 vx := x[ix] 565 vy := y[iy] 566 x[ix], y[iy] = vx+float32(vy*h12), float32(vx*h21)+vy 567 ix += incX 568 iy += incY 569 } 570 case blas.Diagonal: 571 h11 := p.H[0] 572 h22 := p.H[3] 573 if incX == 1 && incY == 1 { 574 x = x[:n] 575 for i, vx := range x { 576 vy := y[i] 577 x[i], y[i] = float32(vx*h11)+vy, -vx+float32(vy*h22) 578 } 579 return 580 } 581 var ix, iy int 582 if incX < 0 { 583 ix = (-n + 1) * incX 584 } 585 if incY < 0 { 586 iy = (-n + 1) * incY 587 } 588 for i := 0; i < n; i++ { 589 vx := x[ix] 590 vy := y[iy] 591 x[ix], y[iy] = float32(vx*h11)+vy, -vx+float32(vy*h22) 592 ix += incX 593 iy += incY 594 } 595 } 596 } 597 598 // Sscal scales x by alpha. 599 // x[i] *= alpha 600 // Sscal has no effect if incX < 0. 601 // 602 // Float32 implementations are autogenerated and not directly tested. 603 func (Implementation) Sscal(n int, alpha float32, x []float32, incX int) { 604 if incX < 1 { 605 if incX == 0 { 606 panic(zeroIncX) 607 } 608 return 609 } 610 if n < 1 { 611 if n == 0 { 612 return 613 } 614 panic(nLT0) 615 } 616 if (n-1)*incX >= len(x) { 617 panic(shortX) 618 } 619 if alpha == 0 { 620 if incX == 1 { 621 x = x[:n] 622 for i := range x { 623 x[i] = 0 624 } 625 return 626 } 627 for ix := 0; ix < n*incX; ix += incX { 628 x[ix] = 0 629 } 630 return 631 } 632 if incX == 1 { 633 f32.ScalUnitary(alpha, x[:n]) 634 return 635 } 636 f32.ScalInc(alpha, x, uintptr(n), uintptr(incX)) 637 }