github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/math/big/float.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 "math/bits" 18 ) 19 20 const debugFloat = false // enable for debugging 21 22 // A nonzero finite Float represents a multi-precision floating point number 23 // 24 // sign × mantissa × 2**exponent 25 // 26 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 27 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 28 // All Floats are ordered, and the ordering of two Floats x and y 29 // is defined by x.Cmp(y). 30 // 31 // Each Float value also has a precision, rounding mode, and accuracy. 32 // The precision is the maximum number of mantissa bits available to 33 // represent the value. The rounding mode specifies how a result should 34 // be rounded to fit into the mantissa bits, and accuracy describes the 35 // rounding error with respect to the exact result. 36 // 37 // Unless specified otherwise, all operations (including setters) that 38 // specify a *Float variable for the result (usually via the receiver 39 // with the exception of MantExp), round the numeric result according 40 // to the precision and rounding mode of the result variable. 41 // 42 // If the provided result precision is 0 (see below), it is set to the 43 // precision of the argument with the largest precision value before any 44 // rounding takes place, and the rounding mode remains unchanged. Thus, 45 // uninitialized Floats provided as result arguments will have their 46 // precision set to a reasonable value determined by the operands and 47 // their mode is the zero value for RoundingMode (ToNearestEven). 48 // 49 // By setting the desired precision to 24 or 53 and using matching rounding 50 // mode (typically ToNearestEven), Float operations produce the same results 51 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 52 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 53 // Exponent underflow and overflow lead to a 0 or an Infinity for different 54 // values than IEEE-754 because Float exponents have a much larger range. 55 // 56 // The zero (uninitialized) value for a Float is ready to use and represents 57 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 58 // 59 type Float struct { 60 prec uint32 61 mode RoundingMode 62 acc Accuracy 63 form form 64 neg bool 65 mant nat 66 exp int32 67 } 68 69 // An ErrNaN panic is raised by a Float operation that would lead to 70 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 71 type ErrNaN struct { 72 msg string 73 } 74 75 func (err ErrNaN) Error() string { 76 return err.msg 77 } 78 79 // NewFloat allocates and returns a new Float set to x, 80 // with precision 53 and rounding mode ToNearestEven. 81 // NewFloat panics with ErrNaN if x is a NaN. 82 func NewFloat(x float64) *Float { 83 if math.IsNaN(x) { 84 panic(ErrNaN{"NewFloat(NaN)"}) 85 } 86 return new(Float).SetFloat64(x) 87 } 88 89 // Exponent and precision limits. 90 const ( 91 MaxExp = math.MaxInt32 // largest supported exponent 92 MinExp = math.MinInt32 // smallest supported exponent 93 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 94 ) 95 96 // Internal representation: The mantissa bits x.mant of a nonzero finite 97 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 98 // the slice may (but doesn't have to) be shorter if the mantissa contains 99 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 100 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 101 // trailing 0 bits or x.prec is not a multiple of the Word size _W, 102 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 103 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 104 // 105 // A zero or non-finite Float x ignores x.mant and x.exp. 106 // 107 // x form neg mant exp 108 // ---------------------------------------------------------- 109 // ±0 zero sign - - 110 // 0 < |x| < +Inf finite sign mantissa exponent 111 // ±Inf inf sign - - 112 113 // A form value describes the internal representation. 114 type form byte 115 116 // The form value order is relevant - do not change! 117 const ( 118 zero form = iota 119 finite 120 inf 121 ) 122 123 // RoundingMode determines how a Float value is rounded to the 124 // desired precision. Rounding may change the Float value; the 125 // rounding error is described by the Float's Accuracy. 126 type RoundingMode byte 127 128 // These constants define supported rounding modes. 129 const ( 130 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 131 ToNearestAway // == IEEE 754-2008 roundTiesToAway 132 ToZero // == IEEE 754-2008 roundTowardZero 133 AwayFromZero // no IEEE 754-2008 equivalent 134 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 135 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 136 ) 137 138 //go:generate stringer -type=RoundingMode 139 140 // Accuracy describes the rounding error produced by the most recent 141 // operation that generated a Float value, relative to the exact value. 142 type Accuracy int8 143 144 // Constants describing the Accuracy of a Float. 145 const ( 146 Below Accuracy = -1 147 Exact Accuracy = 0 148 Above Accuracy = +1 149 ) 150 151 //go:generate stringer -type=Accuracy 152 153 // SetPrec sets z's precision to prec and returns the (possibly) rounded 154 // value of z. Rounding occurs according to z's rounding mode if the mantissa 155 // cannot be represented in prec bits without loss of precision. 156 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 157 // If prec > MaxPrec, it is set to MaxPrec. 158 func (z *Float) SetPrec(prec uint) *Float { 159 z.acc = Exact // optimistically assume no rounding is needed 160 161 // special case 162 if prec == 0 { 163 z.prec = 0 164 if z.form == finite { 165 // truncate z to 0 166 z.acc = makeAcc(z.neg) 167 z.form = zero 168 } 169 return z 170 } 171 172 // general case 173 if prec > MaxPrec { 174 prec = MaxPrec 175 } 176 old := z.prec 177 z.prec = uint32(prec) 178 if z.prec < old { 179 z.round(0) 180 } 181 return z 182 } 183 184 func makeAcc(above bool) Accuracy { 185 if above { 186 return Above 187 } 188 return Below 189 } 190 191 // SetMode sets z's rounding mode to mode and returns an exact z. 192 // z remains unchanged otherwise. 193 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 194 func (z *Float) SetMode(mode RoundingMode) *Float { 195 z.mode = mode 196 z.acc = Exact 197 return z 198 } 199 200 // Prec returns the mantissa precision of x in bits. 201 // The result may be 0 for |x| == 0 and |x| == Inf. 202 func (x *Float) Prec() uint { 203 return uint(x.prec) 204 } 205 206 // MinPrec returns the minimum precision required to represent x exactly 207 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 208 // The result is 0 for |x| == 0 and |x| == Inf. 209 func (x *Float) MinPrec() uint { 210 if x.form != finite { 211 return 0 212 } 213 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 214 } 215 216 // Mode returns the rounding mode of x. 217 func (x *Float) Mode() RoundingMode { 218 return x.mode 219 } 220 221 // Acc returns the accuracy of x produced by the most recent operation. 222 func (x *Float) Acc() Accuracy { 223 return x.acc 224 } 225 226 // Sign returns: 227 // 228 // -1 if x < 0 229 // 0 if x is ±0 230 // +1 if x > 0 231 // 232 func (x *Float) Sign() int { 233 if debugFloat { 234 x.validate() 235 } 236 if x.form == zero { 237 return 0 238 } 239 if x.neg { 240 return -1 241 } 242 return 1 243 } 244 245 // MantExp breaks x into its mantissa and exponent components 246 // and returns the exponent. If a non-nil mant argument is 247 // provided its value is set to the mantissa of x, with the 248 // same precision and rounding mode as x. The components 249 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 250 // Calling MantExp with a nil argument is an efficient way to 251 // get the exponent of the receiver. 252 // 253 // Special cases are: 254 // 255 // ( ±0).MantExp(mant) = 0, with mant set to ±0 256 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 257 // 258 // x and mant may be the same in which case x is set to its 259 // mantissa value. 260 func (x *Float) MantExp(mant *Float) (exp int) { 261 if debugFloat { 262 x.validate() 263 } 264 if x.form == finite { 265 exp = int(x.exp) 266 } 267 if mant != nil { 268 mant.Copy(x) 269 if mant.form == finite { 270 mant.exp = 0 271 } 272 } 273 return 274 } 275 276 func (z *Float) setExpAndRound(exp int64, sbit uint) { 277 if exp < MinExp { 278 // underflow 279 z.acc = makeAcc(z.neg) 280 z.form = zero 281 return 282 } 283 284 if exp > MaxExp { 285 // overflow 286 z.acc = makeAcc(!z.neg) 287 z.form = inf 288 return 289 } 290 291 z.form = finite 292 z.exp = int32(exp) 293 z.round(sbit) 294 } 295 296 // SetMantExp sets z to mant × 2**exp and and returns z. 297 // The result z has the same precision and rounding mode 298 // as mant. SetMantExp is an inverse of MantExp but does 299 // not require 0.5 <= |mant| < 1.0. Specifically: 300 // 301 // mant := new(Float) 302 // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 303 // 304 // Special cases are: 305 // 306 // z.SetMantExp( ±0, exp) = ±0 307 // z.SetMantExp(±Inf, exp) = ±Inf 308 // 309 // z and mant may be the same in which case z's exponent 310 // is set to exp. 311 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 312 if debugFloat { 313 z.validate() 314 mant.validate() 315 } 316 z.Copy(mant) 317 if z.form != finite { 318 return z 319 } 320 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 321 return z 322 } 323 324 // Signbit returns true if x is negative or negative zero. 325 func (x *Float) Signbit() bool { 326 return x.neg 327 } 328 329 // IsInf reports whether x is +Inf or -Inf. 330 func (x *Float) IsInf() bool { 331 return x.form == inf 332 } 333 334 // IsInt reports whether x is an integer. 335 // ±Inf values are not integers. 336 func (x *Float) IsInt() bool { 337 if debugFloat { 338 x.validate() 339 } 340 // special cases 341 if x.form != finite { 342 return x.form == zero 343 } 344 // x.form == finite 345 if x.exp <= 0 { 346 return false 347 } 348 // x.exp > 0 349 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 350 } 351 352 // debugging support 353 func (x *Float) validate() { 354 if !debugFloat { 355 // avoid performance bugs 356 panic("validate called but debugFloat is not set") 357 } 358 if x.form != finite { 359 return 360 } 361 m := len(x.mant) 362 if m == 0 { 363 panic("nonzero finite number with empty mantissa") 364 } 365 const msb = 1 << (_W - 1) 366 if x.mant[m-1]&msb == 0 { 367 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 368 } 369 if x.prec == 0 { 370 panic("zero precision finite number") 371 } 372 } 373 374 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 375 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 376 // have before calling round. z's mantissa must be normalized (with the msb set) 377 // or empty. 378 // 379 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 380 // sign of z. For correct rounding, the sign of z must be set correctly before 381 // calling round. 382 func (z *Float) round(sbit uint) { 383 if debugFloat { 384 z.validate() 385 } 386 387 z.acc = Exact 388 if z.form != finite { 389 // ±0 or ±Inf => nothing left to do 390 return 391 } 392 // z.form == finite && len(z.mant) > 0 393 // m > 0 implies z.prec > 0 (checked by validate) 394 395 m := uint32(len(z.mant)) // present mantissa length in words 396 bits := m * _W // present mantissa bits; bits > 0 397 if bits <= z.prec { 398 // mantissa fits => nothing to do 399 return 400 } 401 // bits > z.prec 402 403 // Rounding is based on two bits: the rounding bit (rbit) and the 404 // sticky bit (sbit). The rbit is the bit immediately before the 405 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 406 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 407 // 408 // rbit sbit => "fractional part" 409 // 410 // 0 0 == 0 411 // 0 1 > 0 , < 0.5 412 // 1 0 == 0.5 413 // 1 1 > 0.5, < 1.0 414 415 // bits > z.prec: mantissa too large => round 416 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 417 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 418 if sbit == 0 { 419 // TODO(gri) if rbit != 0 we don't need to compute sbit for some rounding modes (optimization) 420 sbit = z.mant.sticky(r) 421 } 422 sbit &= 1 // be safe and ensure it's a single bit 423 424 // cut off extra words 425 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 426 if m > n { 427 copy(z.mant, z.mant[m-n:]) // move n last words to front 428 z.mant = z.mant[:n] 429 } 430 431 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 432 ntz := n*_W - z.prec // 0 <= ntz < _W 433 lsb := Word(1) << ntz 434 435 // round if result is inexact 436 if rbit|sbit != 0 { 437 // Make rounding decision: The result mantissa is truncated ("rounded down") 438 // by default. Decide if we need to increment, or "round up", the (unsigned) 439 // mantissa. 440 inc := false 441 switch z.mode { 442 case ToNegativeInf: 443 inc = z.neg 444 case ToZero: 445 // nothing to do 446 case ToNearestEven: 447 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 448 case ToNearestAway: 449 inc = rbit != 0 450 case AwayFromZero: 451 inc = true 452 case ToPositiveInf: 453 inc = !z.neg 454 default: 455 panic("unreachable") 456 } 457 458 // A positive result (!z.neg) is Above the exact result if we increment, 459 // and it's Below if we truncate (Exact results require no rounding). 460 // For a negative result (z.neg) it is exactly the opposite. 461 z.acc = makeAcc(inc != z.neg) 462 463 if inc { 464 // add 1 to mantissa 465 if addVW(z.mant, z.mant, lsb) != 0 { 466 // mantissa overflow => adjust exponent 467 if z.exp >= MaxExp { 468 // exponent overflow 469 z.form = inf 470 return 471 } 472 z.exp++ 473 // adjust mantissa: divide by 2 to compensate for exponent adjustment 474 shrVU(z.mant, z.mant, 1) 475 // set msb == carry == 1 from the mantissa overflow above 476 const msb = 1 << (_W - 1) 477 z.mant[n-1] |= msb 478 } 479 } 480 } 481 482 // zero out trailing bits in least-significant word 483 z.mant[0] &^= lsb - 1 484 485 if debugFloat { 486 z.validate() 487 } 488 } 489 490 func (z *Float) setBits64(neg bool, x uint64) *Float { 491 if z.prec == 0 { 492 z.prec = 64 493 } 494 z.acc = Exact 495 z.neg = neg 496 if x == 0 { 497 z.form = zero 498 return z 499 } 500 // x != 0 501 z.form = finite 502 s := bits.LeadingZeros64(x) 503 z.mant = z.mant.setUint64(x << uint(s)) 504 z.exp = int32(64 - s) // always fits 505 if z.prec < 64 { 506 z.round(0) 507 } 508 return z 509 } 510 511 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 512 // If z's precision is 0, it is changed to 64 (and rounding will have 513 // no effect). 514 func (z *Float) SetUint64(x uint64) *Float { 515 return z.setBits64(false, x) 516 } 517 518 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 519 // If z's precision is 0, it is changed to 64 (and rounding will have 520 // no effect). 521 func (z *Float) SetInt64(x int64) *Float { 522 u := x 523 if u < 0 { 524 u = -u 525 } 526 // We cannot simply call z.SetUint64(uint64(u)) and change 527 // the sign afterwards because the sign affects rounding. 528 return z.setBits64(x < 0, uint64(u)) 529 } 530 531 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 532 // If z's precision is 0, it is changed to 53 (and rounding will have 533 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 534 func (z *Float) SetFloat64(x float64) *Float { 535 if z.prec == 0 { 536 z.prec = 53 537 } 538 if math.IsNaN(x) { 539 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 540 } 541 z.acc = Exact 542 z.neg = math.Signbit(x) // handle -0, -Inf correctly 543 if x == 0 { 544 z.form = zero 545 return z 546 } 547 if math.IsInf(x, 0) { 548 z.form = inf 549 return z 550 } 551 // normalized x != 0 552 z.form = finite 553 fmant, exp := math.Frexp(x) // get normalized mantissa 554 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 555 z.exp = int32(exp) // always fits 556 if z.prec < 53 { 557 z.round(0) 558 } 559 return z 560 } 561 562 // fnorm normalizes mantissa m by shifting it to the left 563 // such that the msb of the most-significant word (msw) is 1. 564 // It returns the shift amount. It assumes that len(m) != 0. 565 func fnorm(m nat) int64 { 566 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 567 panic("msw of mantissa is 0") 568 } 569 s := nlz(m[len(m)-1]) 570 if s > 0 { 571 c := shlVU(m, m, s) 572 if debugFloat && c != 0 { 573 panic("nlz or shlVU incorrect") 574 } 575 } 576 return int64(s) 577 } 578 579 // SetInt sets z to the (possibly rounded) value of x and returns z. 580 // If z's precision is 0, it is changed to the larger of x.BitLen() 581 // or 64 (and rounding will have no effect). 582 func (z *Float) SetInt(x *Int) *Float { 583 // TODO(gri) can be more efficient if z.prec > 0 584 // but small compared to the size of x, or if there 585 // are many trailing 0's. 586 bits := uint32(x.BitLen()) 587 if z.prec == 0 { 588 z.prec = umax32(bits, 64) 589 } 590 z.acc = Exact 591 z.neg = x.neg 592 if len(x.abs) == 0 { 593 z.form = zero 594 return z 595 } 596 // x != 0 597 z.mant = z.mant.set(x.abs) 598 fnorm(z.mant) 599 z.setExpAndRound(int64(bits), 0) 600 return z 601 } 602 603 // SetRat sets z to the (possibly rounded) value of x and returns z. 604 // If z's precision is 0, it is changed to the largest of a.BitLen(), 605 // b.BitLen(), or 64; with x = a/b. 606 func (z *Float) SetRat(x *Rat) *Float { 607 if x.IsInt() { 608 return z.SetInt(x.Num()) 609 } 610 var a, b Float 611 a.SetInt(x.Num()) 612 b.SetInt(x.Denom()) 613 if z.prec == 0 { 614 z.prec = umax32(a.prec, b.prec) 615 } 616 return z.Quo(&a, &b) 617 } 618 619 // SetInf sets z to the infinite Float -Inf if signbit is 620 // set, or +Inf if signbit is not set, and returns z. The 621 // precision of z is unchanged and the result is always 622 // Exact. 623 func (z *Float) SetInf(signbit bool) *Float { 624 z.acc = Exact 625 z.form = inf 626 z.neg = signbit 627 return z 628 } 629 630 // Set sets z to the (possibly rounded) value of x and returns z. 631 // If z's precision is 0, it is changed to the precision of x 632 // before setting z (and rounding will have no effect). 633 // Rounding is performed according to z's precision and rounding 634 // mode; and z's accuracy reports the result error relative to the 635 // exact (not rounded) result. 636 func (z *Float) Set(x *Float) *Float { 637 if debugFloat { 638 x.validate() 639 } 640 z.acc = Exact 641 if z != x { 642 z.form = x.form 643 z.neg = x.neg 644 if x.form == finite { 645 z.exp = x.exp 646 z.mant = z.mant.set(x.mant) 647 } 648 if z.prec == 0 { 649 z.prec = x.prec 650 } else if z.prec < x.prec { 651 z.round(0) 652 } 653 } 654 return z 655 } 656 657 // Copy sets z to x, with the same precision, rounding mode, and 658 // accuracy as x, and returns z. x is not changed even if z and 659 // x are the same. 660 func (z *Float) Copy(x *Float) *Float { 661 if debugFloat { 662 x.validate() 663 } 664 if z != x { 665 z.prec = x.prec 666 z.mode = x.mode 667 z.acc = x.acc 668 z.form = x.form 669 z.neg = x.neg 670 if z.form == finite { 671 z.mant = z.mant.set(x.mant) 672 z.exp = x.exp 673 } 674 } 675 return z 676 } 677 678 // msb32 returns the 32 most significant bits of x. 679 func msb32(x nat) uint32 { 680 i := len(x) - 1 681 if i < 0 { 682 return 0 683 } 684 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 685 panic("x not normalized") 686 } 687 switch _W { 688 case 32: 689 return uint32(x[i]) 690 case 64: 691 return uint32(x[i] >> 32) 692 } 693 panic("unreachable") 694 } 695 696 // msb64 returns the 64 most significant bits of x. 697 func msb64(x nat) uint64 { 698 i := len(x) - 1 699 if i < 0 { 700 return 0 701 } 702 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 703 panic("x not normalized") 704 } 705 switch _W { 706 case 32: 707 v := uint64(x[i]) << 32 708 if i > 0 { 709 v |= uint64(x[i-1]) 710 } 711 return v 712 case 64: 713 return uint64(x[i]) 714 } 715 panic("unreachable") 716 } 717 718 // Uint64 returns the unsigned integer resulting from truncating x 719 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 720 // if x is an integer and Below otherwise. 721 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 722 // for x > math.MaxUint64. 723 func (x *Float) Uint64() (uint64, Accuracy) { 724 if debugFloat { 725 x.validate() 726 } 727 728 switch x.form { 729 case finite: 730 if x.neg { 731 return 0, Above 732 } 733 // 0 < x < +Inf 734 if x.exp <= 0 { 735 // 0 < x < 1 736 return 0, Below 737 } 738 // 1 <= x < Inf 739 if x.exp <= 64 { 740 // u = trunc(x) fits into a uint64 741 u := msb64(x.mant) >> (64 - uint32(x.exp)) 742 if x.MinPrec() <= 64 { 743 return u, Exact 744 } 745 return u, Below // x truncated 746 } 747 // x too large 748 return math.MaxUint64, Below 749 750 case zero: 751 return 0, Exact 752 753 case inf: 754 if x.neg { 755 return 0, Above 756 } 757 return math.MaxUint64, Below 758 } 759 760 panic("unreachable") 761 } 762 763 // Int64 returns the integer resulting from truncating x towards zero. 764 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 765 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 766 // The result is (math.MinInt64, Above) for x < math.MinInt64, 767 // and (math.MaxInt64, Below) for x > math.MaxInt64. 768 func (x *Float) Int64() (int64, Accuracy) { 769 if debugFloat { 770 x.validate() 771 } 772 773 switch x.form { 774 case finite: 775 // 0 < |x| < +Inf 776 acc := makeAcc(x.neg) 777 if x.exp <= 0 { 778 // 0 < |x| < 1 779 return 0, acc 780 } 781 // x.exp > 0 782 783 // 1 <= |x| < +Inf 784 if x.exp <= 63 { 785 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 786 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 787 if x.neg { 788 i = -i 789 } 790 if x.MinPrec() <= uint(x.exp) { 791 return i, Exact 792 } 793 return i, acc // x truncated 794 } 795 if x.neg { 796 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 797 if x.exp == 64 && x.MinPrec() == 1 { 798 acc = Exact 799 } 800 return math.MinInt64, acc 801 } 802 // x too large 803 return math.MaxInt64, Below 804 805 case zero: 806 return 0, Exact 807 808 case inf: 809 if x.neg { 810 return math.MinInt64, Above 811 } 812 return math.MaxInt64, Below 813 } 814 815 panic("unreachable") 816 } 817 818 // Float32 returns the float32 value nearest to x. If x is too small to be 819 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 820 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 821 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 822 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 823 func (x *Float) Float32() (float32, Accuracy) { 824 if debugFloat { 825 x.validate() 826 } 827 828 switch x.form { 829 case finite: 830 // 0 < |x| < +Inf 831 832 const ( 833 fbits = 32 // float size 834 mbits = 23 // mantissa size (excluding implicit msb) 835 ebits = fbits - mbits - 1 // 8 exponent size 836 bias = 1<<(ebits-1) - 1 // 127 exponent bias 837 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 838 emin = 1 - bias // -126 smallest unbiased exponent (normal) 839 emax = bias // 127 largest unbiased exponent (normal) 840 ) 841 842 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 843 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 844 845 // Compute precision p for float32 mantissa. 846 // If the exponent is too small, we have a denormal number before 847 // rounding and fewer than p mantissa bits of precision available 848 // (the exponent remains fixed but the mantissa gets shifted right). 849 p := mbits + 1 // precision of normal float 850 if e < emin { 851 // recompute precision 852 p = mbits + 1 - emin + int(e) 853 // If p == 0, the mantissa of x is shifted so much to the right 854 // that its msb falls immediately to the right of the float32 855 // mantissa space. In other words, if the smallest denormal is 856 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 857 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 858 // If m == 0.5, it is rounded down to even, i.e., 0.0. 859 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 860 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 861 // underflow to ±0 862 if x.neg { 863 var z float32 864 return -z, Above 865 } 866 return 0.0, Below 867 } 868 // otherwise, round up 869 // We handle p == 0 explicitly because it's easy and because 870 // Float.round doesn't support rounding to 0 bits of precision. 871 if p == 0 { 872 if x.neg { 873 return -math.SmallestNonzeroFloat32, Below 874 } 875 return math.SmallestNonzeroFloat32, Above 876 } 877 } 878 // p > 0 879 880 // round 881 var r Float 882 r.prec = uint32(p) 883 r.Set(x) 884 e = r.exp - 1 885 886 // Rounding may have caused r to overflow to ±Inf 887 // (rounding never causes underflows to 0). 888 // If the exponent is too large, also overflow to ±Inf. 889 if r.form == inf || e > emax { 890 // overflow 891 if x.neg { 892 return float32(math.Inf(-1)), Below 893 } 894 return float32(math.Inf(+1)), Above 895 } 896 // e <= emax 897 898 // Determine sign, biased exponent, and mantissa. 899 var sign, bexp, mant uint32 900 if x.neg { 901 sign = 1 << (fbits - 1) 902 } 903 904 // Rounding may have caused a denormal number to 905 // become normal. Check again. 906 if e < emin { 907 // denormal number: recompute precision 908 // Since rounding may have at best increased precision 909 // and we have eliminated p <= 0 early, we know p > 0. 910 // bexp == 0 for denormals 911 p = mbits + 1 - emin + int(e) 912 mant = msb32(r.mant) >> uint(fbits-p) 913 } else { 914 // normal number: emin <= e <= emax 915 bexp = uint32(e+bias) << mbits 916 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 917 } 918 919 return math.Float32frombits(sign | bexp | mant), r.acc 920 921 case zero: 922 if x.neg { 923 var z float32 924 return -z, Exact 925 } 926 return 0.0, Exact 927 928 case inf: 929 if x.neg { 930 return float32(math.Inf(-1)), Exact 931 } 932 return float32(math.Inf(+1)), Exact 933 } 934 935 panic("unreachable") 936 } 937 938 // Float64 returns the float64 value nearest to x. If x is too small to be 939 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 940 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 941 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 942 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 943 func (x *Float) Float64() (float64, Accuracy) { 944 if debugFloat { 945 x.validate() 946 } 947 948 switch x.form { 949 case finite: 950 // 0 < |x| < +Inf 951 952 const ( 953 fbits = 64 // float size 954 mbits = 52 // mantissa size (excluding implicit msb) 955 ebits = fbits - mbits - 1 // 11 exponent size 956 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 957 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 958 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 959 emax = bias // 1023 largest unbiased exponent (normal) 960 ) 961 962 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 963 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 964 965 // Compute precision p for float64 mantissa. 966 // If the exponent is too small, we have a denormal number before 967 // rounding and fewer than p mantissa bits of precision available 968 // (the exponent remains fixed but the mantissa gets shifted right). 969 p := mbits + 1 // precision of normal float 970 if e < emin { 971 // recompute precision 972 p = mbits + 1 - emin + int(e) 973 // If p == 0, the mantissa of x is shifted so much to the right 974 // that its msb falls immediately to the right of the float64 975 // mantissa space. In other words, if the smallest denormal is 976 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 977 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 978 // If m == 0.5, it is rounded down to even, i.e., 0.0. 979 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 980 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 981 // underflow to ±0 982 if x.neg { 983 var z float64 984 return -z, Above 985 } 986 return 0.0, Below 987 } 988 // otherwise, round up 989 // We handle p == 0 explicitly because it's easy and because 990 // Float.round doesn't support rounding to 0 bits of precision. 991 if p == 0 { 992 if x.neg { 993 return -math.SmallestNonzeroFloat64, Below 994 } 995 return math.SmallestNonzeroFloat64, Above 996 } 997 } 998 // p > 0 999 1000 // round 1001 var r Float 1002 r.prec = uint32(p) 1003 r.Set(x) 1004 e = r.exp - 1 1005 1006 // Rounding may have caused r to overflow to ±Inf 1007 // (rounding never causes underflows to 0). 1008 // If the exponent is too large, also overflow to ±Inf. 1009 if r.form == inf || e > emax { 1010 // overflow 1011 if x.neg { 1012 return math.Inf(-1), Below 1013 } 1014 return math.Inf(+1), Above 1015 } 1016 // e <= emax 1017 1018 // Determine sign, biased exponent, and mantissa. 1019 var sign, bexp, mant uint64 1020 if x.neg { 1021 sign = 1 << (fbits - 1) 1022 } 1023 1024 // Rounding may have caused a denormal number to 1025 // become normal. Check again. 1026 if e < emin { 1027 // denormal number: recompute precision 1028 // Since rounding may have at best increased precision 1029 // and we have eliminated p <= 0 early, we know p > 0. 1030 // bexp == 0 for denormals 1031 p = mbits + 1 - emin + int(e) 1032 mant = msb64(r.mant) >> uint(fbits-p) 1033 } else { 1034 // normal number: emin <= e <= emax 1035 bexp = uint64(e+bias) << mbits 1036 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1037 } 1038 1039 return math.Float64frombits(sign | bexp | mant), r.acc 1040 1041 case zero: 1042 if x.neg { 1043 var z float64 1044 return -z, Exact 1045 } 1046 return 0.0, Exact 1047 1048 case inf: 1049 if x.neg { 1050 return math.Inf(-1), Exact 1051 } 1052 return math.Inf(+1), Exact 1053 } 1054 1055 panic("unreachable") 1056 } 1057 1058 // Int returns the result of truncating x towards zero; 1059 // or nil if x is an infinity. 1060 // The result is Exact if x.IsInt(); otherwise it is Below 1061 // for x > 0, and Above for x < 0. 1062 // If a non-nil *Int argument z is provided, Int stores 1063 // the result in z instead of allocating a new Int. 1064 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1065 if debugFloat { 1066 x.validate() 1067 } 1068 1069 if z == nil && x.form <= finite { 1070 z = new(Int) 1071 } 1072 1073 switch x.form { 1074 case finite: 1075 // 0 < |x| < +Inf 1076 acc := makeAcc(x.neg) 1077 if x.exp <= 0 { 1078 // 0 < |x| < 1 1079 return z.SetInt64(0), acc 1080 } 1081 // x.exp > 0 1082 1083 // 1 <= |x| < +Inf 1084 // determine minimum required precision for x 1085 allBits := uint(len(x.mant)) * _W 1086 exp := uint(x.exp) 1087 if x.MinPrec() <= exp { 1088 acc = Exact 1089 } 1090 // shift mantissa as needed 1091 if z == nil { 1092 z = new(Int) 1093 } 1094 z.neg = x.neg 1095 switch { 1096 case exp > allBits: 1097 z.abs = z.abs.shl(x.mant, exp-allBits) 1098 default: 1099 z.abs = z.abs.set(x.mant) 1100 case exp < allBits: 1101 z.abs = z.abs.shr(x.mant, allBits-exp) 1102 } 1103 return z, acc 1104 1105 case zero: 1106 return z.SetInt64(0), Exact 1107 1108 case inf: 1109 return nil, makeAcc(x.neg) 1110 } 1111 1112 panic("unreachable") 1113 } 1114 1115 // Rat returns the rational number corresponding to x; 1116 // or nil if x is an infinity. 1117 // The result is Exact if x is not an Inf. 1118 // If a non-nil *Rat argument z is provided, Rat stores 1119 // the result in z instead of allocating a new Rat. 1120 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1121 if debugFloat { 1122 x.validate() 1123 } 1124 1125 if z == nil && x.form <= finite { 1126 z = new(Rat) 1127 } 1128 1129 switch x.form { 1130 case finite: 1131 // 0 < |x| < +Inf 1132 allBits := int32(len(x.mant)) * _W 1133 // build up numerator and denominator 1134 z.a.neg = x.neg 1135 switch { 1136 case x.exp > allBits: 1137 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1138 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1139 // z already in normal form 1140 default: 1141 z.a.abs = z.a.abs.set(x.mant) 1142 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1143 // z already in normal form 1144 case x.exp < allBits: 1145 z.a.abs = z.a.abs.set(x.mant) 1146 t := z.b.abs.setUint64(1) 1147 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1148 z.norm() 1149 } 1150 return z, Exact 1151 1152 case zero: 1153 return z.SetInt64(0), Exact 1154 1155 case inf: 1156 return nil, makeAcc(x.neg) 1157 } 1158 1159 panic("unreachable") 1160 } 1161 1162 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1163 // and returns z. 1164 func (z *Float) Abs(x *Float) *Float { 1165 z.Set(x) 1166 z.neg = false 1167 return z 1168 } 1169 1170 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1171 // and returns z. 1172 func (z *Float) Neg(x *Float) *Float { 1173 z.Set(x) 1174 z.neg = !z.neg 1175 return z 1176 } 1177 1178 func validateBinaryOperands(x, y *Float) { 1179 if !debugFloat { 1180 // avoid performance bugs 1181 panic("validateBinaryOperands called but debugFloat is not set") 1182 } 1183 if len(x.mant) == 0 { 1184 panic("empty mantissa for x") 1185 } 1186 if len(y.mant) == 0 { 1187 panic("empty mantissa for y") 1188 } 1189 } 1190 1191 // z = x + y, ignoring signs of x and y for the addition 1192 // but using the sign of z for rounding the result. 1193 // x and y must have a non-empty mantissa and valid exponent. 1194 func (z *Float) uadd(x, y *Float) { 1195 // Note: This implementation requires 2 shifts most of the 1196 // time. It is also inefficient if exponents or precisions 1197 // differ by wide margins. The following article describes 1198 // an efficient (but much more complicated) implementation 1199 // compatible with the internal representation used here: 1200 // 1201 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1202 // Point Addition With Exact Rounding (as in the MPFR Library)" 1203 // http://www.vinc17.net/research/papers/rnc6.pdf 1204 1205 if debugFloat { 1206 validateBinaryOperands(x, y) 1207 } 1208 1209 // compute exponents ex, ey for mantissa with "binary point" 1210 // on the right (mantissa.0) - use int64 to avoid overflow 1211 ex := int64(x.exp) - int64(len(x.mant))*_W 1212 ey := int64(y.exp) - int64(len(y.mant))*_W 1213 1214 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1215 1216 // TODO(gri) having a combined add-and-shift primitive 1217 // could make this code significantly faster 1218 switch { 1219 case ex < ey: 1220 if al { 1221 t := nat(nil).shl(y.mant, uint(ey-ex)) 1222 z.mant = z.mant.add(x.mant, t) 1223 } else { 1224 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1225 z.mant = z.mant.add(x.mant, z.mant) 1226 } 1227 default: 1228 // ex == ey, no shift needed 1229 z.mant = z.mant.add(x.mant, y.mant) 1230 case ex > ey: 1231 if al { 1232 t := nat(nil).shl(x.mant, uint(ex-ey)) 1233 z.mant = z.mant.add(t, y.mant) 1234 } else { 1235 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1236 z.mant = z.mant.add(z.mant, y.mant) 1237 } 1238 ex = ey 1239 } 1240 // len(z.mant) > 0 1241 1242 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1243 } 1244 1245 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1246 // but using the sign of z for rounding the result. 1247 // x and y must have a non-empty mantissa and valid exponent. 1248 func (z *Float) usub(x, y *Float) { 1249 // This code is symmetric to uadd. 1250 // We have not factored the common code out because 1251 // eventually uadd (and usub) should be optimized 1252 // by special-casing, and the code will diverge. 1253 1254 if debugFloat { 1255 validateBinaryOperands(x, y) 1256 } 1257 1258 ex := int64(x.exp) - int64(len(x.mant))*_W 1259 ey := int64(y.exp) - int64(len(y.mant))*_W 1260 1261 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1262 1263 switch { 1264 case ex < ey: 1265 if al { 1266 t := nat(nil).shl(y.mant, uint(ey-ex)) 1267 z.mant = t.sub(x.mant, t) 1268 } else { 1269 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1270 z.mant = z.mant.sub(x.mant, z.mant) 1271 } 1272 default: 1273 // ex == ey, no shift needed 1274 z.mant = z.mant.sub(x.mant, y.mant) 1275 case ex > ey: 1276 if al { 1277 t := nat(nil).shl(x.mant, uint(ex-ey)) 1278 z.mant = t.sub(t, y.mant) 1279 } else { 1280 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1281 z.mant = z.mant.sub(z.mant, y.mant) 1282 } 1283 ex = ey 1284 } 1285 1286 // operands may have canceled each other out 1287 if len(z.mant) == 0 { 1288 z.acc = Exact 1289 z.form = zero 1290 z.neg = false 1291 return 1292 } 1293 // len(z.mant) > 0 1294 1295 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1296 } 1297 1298 // z = x * y, ignoring signs of x and y for the multiplication 1299 // but using the sign of z for rounding the result. 1300 // x and y must have a non-empty mantissa and valid exponent. 1301 func (z *Float) umul(x, y *Float) { 1302 if debugFloat { 1303 validateBinaryOperands(x, y) 1304 } 1305 1306 // Note: This is doing too much work if the precision 1307 // of z is less than the sum of the precisions of x 1308 // and y which is often the case (e.g., if all floats 1309 // have the same precision). 1310 // TODO(gri) Optimize this for the common case. 1311 1312 e := int64(x.exp) + int64(y.exp) 1313 z.mant = z.mant.mul(x.mant, y.mant) 1314 1315 z.setExpAndRound(e-fnorm(z.mant), 0) 1316 } 1317 1318 // z = x / y, ignoring signs of x and y for the division 1319 // but using the sign of z for rounding the result. 1320 // x and y must have a non-empty mantissa and valid exponent. 1321 func (z *Float) uquo(x, y *Float) { 1322 if debugFloat { 1323 validateBinaryOperands(x, y) 1324 } 1325 1326 // mantissa length in words for desired result precision + 1 1327 // (at least one extra bit so we get the rounding bit after 1328 // the division) 1329 n := int(z.prec/_W) + 1 1330 1331 // compute adjusted x.mant such that we get enough result precision 1332 xadj := x.mant 1333 if d := n - len(x.mant) + len(y.mant); d > 0 { 1334 // d extra words needed => add d "0 digits" to x 1335 xadj = make(nat, len(x.mant)+d) 1336 copy(xadj[d:], x.mant) 1337 } 1338 // TODO(gri): If we have too many digits (d < 0), we should be able 1339 // to shorten x for faster division. But we must be extra careful 1340 // with rounding in that case. 1341 1342 // Compute d before division since there may be aliasing of x.mant 1343 // (via xadj) or y.mant with z.mant. 1344 d := len(xadj) - len(y.mant) 1345 1346 // divide 1347 var r nat 1348 z.mant, r = z.mant.div(nil, xadj, y.mant) 1349 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1350 1351 // The result is long enough to include (at least) the rounding bit. 1352 // If there's a non-zero remainder, the corresponding fractional part 1353 // (if it were computed), would have a non-zero sticky bit (if it were 1354 // zero, it couldn't have a non-zero remainder). 1355 var sbit uint 1356 if len(r) > 0 { 1357 sbit = 1 1358 } 1359 1360 z.setExpAndRound(e-fnorm(z.mant), sbit) 1361 } 1362 1363 // ucmp returns -1, 0, or +1, depending on whether 1364 // |x| < |y|, |x| == |y|, or |x| > |y|. 1365 // x and y must have a non-empty mantissa and valid exponent. 1366 func (x *Float) ucmp(y *Float) int { 1367 if debugFloat { 1368 validateBinaryOperands(x, y) 1369 } 1370 1371 switch { 1372 case x.exp < y.exp: 1373 return -1 1374 case x.exp > y.exp: 1375 return +1 1376 } 1377 // x.exp == y.exp 1378 1379 // compare mantissas 1380 i := len(x.mant) 1381 j := len(y.mant) 1382 for i > 0 || j > 0 { 1383 var xm, ym Word 1384 if i > 0 { 1385 i-- 1386 xm = x.mant[i] 1387 } 1388 if j > 0 { 1389 j-- 1390 ym = y.mant[j] 1391 } 1392 switch { 1393 case xm < ym: 1394 return -1 1395 case xm > ym: 1396 return +1 1397 } 1398 } 1399 1400 return 0 1401 } 1402 1403 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1404 // 1405 // When neither the inputs nor result are NaN, the sign of a product or 1406 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1407 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1408 // one of the addends’ signs; and the sign of the result of conversions, 1409 // the quantize operation, the roundToIntegral operations, and the 1410 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1411 // These rules shall apply even when operands or results are zero or infinite. 1412 // 1413 // When the sum of two operands with opposite signs (or the difference of 1414 // two operands with like signs) is exactly zero, the sign of that sum (or 1415 // difference) shall be +0 in all rounding-direction attributes except 1416 // roundTowardNegative; under that attribute, the sign of an exact zero 1417 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1418 // sign as x even when x is zero. 1419 // 1420 // See also: https://play.golang.org/p/RtH3UCt5IH 1421 1422 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1423 // it is changed to the larger of x's or y's precision before the operation. 1424 // Rounding is performed according to z's precision and rounding mode; and 1425 // z's accuracy reports the result error relative to the exact (not rounded) 1426 // result. Add panics with ErrNaN if x and y are infinities with opposite 1427 // signs. The value of z is undefined in that case. 1428 // 1429 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1430 func (z *Float) Add(x, y *Float) *Float { 1431 if debugFloat { 1432 x.validate() 1433 y.validate() 1434 } 1435 1436 if z.prec == 0 { 1437 z.prec = umax32(x.prec, y.prec) 1438 } 1439 1440 if x.form == finite && y.form == finite { 1441 // x + y (common case) 1442 1443 // Below we set z.neg = x.neg, and when z aliases y this will 1444 // change the y operand's sign. This is fine, because if an 1445 // operand aliases the receiver it'll be overwritten, but we still 1446 // want the original x.neg and y.neg values when we evaluate 1447 // x.neg != y.neg, so we need to save y.neg before setting z.neg. 1448 yneg := y.neg 1449 1450 z.neg = x.neg 1451 if x.neg == yneg { 1452 // x + y == x + y 1453 // (-x) + (-y) == -(x + y) 1454 z.uadd(x, y) 1455 } else { 1456 // x + (-y) == x - y == -(y - x) 1457 // (-x) + y == y - x == -(x - y) 1458 if x.ucmp(y) > 0 { 1459 z.usub(x, y) 1460 } else { 1461 z.neg = !z.neg 1462 z.usub(y, x) 1463 } 1464 } 1465 return z 1466 } 1467 1468 if x.form == inf && y.form == inf && x.neg != y.neg { 1469 // +Inf + -Inf 1470 // -Inf + +Inf 1471 // value of z is undefined but make sure it's valid 1472 z.acc = Exact 1473 z.form = zero 1474 z.neg = false 1475 panic(ErrNaN{"addition of infinities with opposite signs"}) 1476 } 1477 1478 if x.form == zero && y.form == zero { 1479 // ±0 + ±0 1480 z.acc = Exact 1481 z.form = zero 1482 z.neg = x.neg && y.neg // -0 + -0 == -0 1483 return z 1484 } 1485 1486 if x.form == inf || y.form == zero { 1487 // ±Inf + y 1488 // x + ±0 1489 return z.Set(x) 1490 } 1491 1492 // ±0 + y 1493 // x + ±Inf 1494 return z.Set(y) 1495 } 1496 1497 // Sub sets z to the rounded difference x-y and returns z. 1498 // Precision, rounding, and accuracy reporting are as for Add. 1499 // Sub panics with ErrNaN if x and y are infinities with equal 1500 // signs. The value of z is undefined in that case. 1501 func (z *Float) Sub(x, y *Float) *Float { 1502 if debugFloat { 1503 x.validate() 1504 y.validate() 1505 } 1506 1507 if z.prec == 0 { 1508 z.prec = umax32(x.prec, y.prec) 1509 } 1510 1511 if x.form == finite && y.form == finite { 1512 // x - y (common case) 1513 yneg := y.neg 1514 z.neg = x.neg 1515 if x.neg != yneg { 1516 // x - (-y) == x + y 1517 // (-x) - y == -(x + y) 1518 z.uadd(x, y) 1519 } else { 1520 // x - y == x - y == -(y - x) 1521 // (-x) - (-y) == y - x == -(x - y) 1522 if x.ucmp(y) > 0 { 1523 z.usub(x, y) 1524 } else { 1525 z.neg = !z.neg 1526 z.usub(y, x) 1527 } 1528 } 1529 return z 1530 } 1531 1532 if x.form == inf && y.form == inf && x.neg == y.neg { 1533 // +Inf - +Inf 1534 // -Inf - -Inf 1535 // value of z is undefined but make sure it's valid 1536 z.acc = Exact 1537 z.form = zero 1538 z.neg = false 1539 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1540 } 1541 1542 if x.form == zero && y.form == zero { 1543 // ±0 - ±0 1544 z.acc = Exact 1545 z.form = zero 1546 z.neg = x.neg && !y.neg // -0 - +0 == -0 1547 return z 1548 } 1549 1550 if x.form == inf || y.form == zero { 1551 // ±Inf - y 1552 // x - ±0 1553 return z.Set(x) 1554 } 1555 1556 // ±0 - y 1557 // x - ±Inf 1558 return z.Neg(y) 1559 } 1560 1561 // Mul sets z to the rounded product x*y and returns z. 1562 // Precision, rounding, and accuracy reporting are as for Add. 1563 // Mul panics with ErrNaN if one operand is zero and the other 1564 // operand an infinity. The value of z is undefined in that case. 1565 func (z *Float) Mul(x, y *Float) *Float { 1566 if debugFloat { 1567 x.validate() 1568 y.validate() 1569 } 1570 1571 if z.prec == 0 { 1572 z.prec = umax32(x.prec, y.prec) 1573 } 1574 1575 z.neg = x.neg != y.neg 1576 1577 if x.form == finite && y.form == finite { 1578 // x * y (common case) 1579 z.umul(x, y) 1580 return z 1581 } 1582 1583 z.acc = Exact 1584 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1585 // ±0 * ±Inf 1586 // ±Inf * ±0 1587 // value of z is undefined but make sure it's valid 1588 z.form = zero 1589 z.neg = false 1590 panic(ErrNaN{"multiplication of zero with infinity"}) 1591 } 1592 1593 if x.form == inf || y.form == inf { 1594 // ±Inf * y 1595 // x * ±Inf 1596 z.form = inf 1597 return z 1598 } 1599 1600 // ±0 * y 1601 // x * ±0 1602 z.form = zero 1603 return z 1604 } 1605 1606 // Quo sets z to the rounded quotient x/y and returns z. 1607 // Precision, rounding, and accuracy reporting are as for Add. 1608 // Quo panics with ErrNaN if both operands are zero or infinities. 1609 // The value of z is undefined in that case. 1610 func (z *Float) Quo(x, y *Float) *Float { 1611 if debugFloat { 1612 x.validate() 1613 y.validate() 1614 } 1615 1616 if z.prec == 0 { 1617 z.prec = umax32(x.prec, y.prec) 1618 } 1619 1620 z.neg = x.neg != y.neg 1621 1622 if x.form == finite && y.form == finite { 1623 // x / y (common case) 1624 z.uquo(x, y) 1625 return z 1626 } 1627 1628 z.acc = Exact 1629 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1630 // ±0 / ±0 1631 // ±Inf / ±Inf 1632 // value of z is undefined but make sure it's valid 1633 z.form = zero 1634 z.neg = false 1635 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1636 } 1637 1638 if x.form == zero || y.form == inf { 1639 // ±0 / y 1640 // x / ±Inf 1641 z.form = zero 1642 return z 1643 } 1644 1645 // x / ±0 1646 // ±Inf / y 1647 z.form = inf 1648 return z 1649 } 1650 1651 // Cmp compares x and y and returns: 1652 // 1653 // -1 if x < y 1654 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1655 // +1 if x > y 1656 // 1657 func (x *Float) Cmp(y *Float) int { 1658 if debugFloat { 1659 x.validate() 1660 y.validate() 1661 } 1662 1663 mx := x.ord() 1664 my := y.ord() 1665 switch { 1666 case mx < my: 1667 return -1 1668 case mx > my: 1669 return +1 1670 } 1671 // mx == my 1672 1673 // only if |mx| == 1 we have to compare the mantissae 1674 switch mx { 1675 case -1: 1676 return y.ucmp(x) 1677 case +1: 1678 return x.ucmp(y) 1679 } 1680 1681 return 0 1682 } 1683 1684 // ord classifies x and returns: 1685 // 1686 // -2 if -Inf == x 1687 // -1 if -Inf < x < 0 1688 // 0 if x == 0 (signed or unsigned) 1689 // +1 if 0 < x < +Inf 1690 // +2 if x == +Inf 1691 // 1692 func (x *Float) ord() int { 1693 var m int 1694 switch x.form { 1695 case finite: 1696 m = 1 1697 case zero: 1698 return 0 1699 case inf: 1700 m = 2 1701 } 1702 if x.neg { 1703 m = -m 1704 } 1705 return m 1706 } 1707 1708 func umax32(x, y uint32) uint32 { 1709 if x > y { 1710 return x 1711 } 1712 return y 1713 }