github.com/riscv/riscv-go@v0.0.0-20200123204226-124ebd6fcc8e/src/math/big/float.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 ) 18 19 const debugFloat = false // enable for debugging 20 21 // A nonzero finite Float represents a multi-precision floating point number 22 // 23 // sign × mantissa × 2**exponent 24 // 25 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 26 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 27 // All Floats are ordered, and the ordering of two Floats x and y 28 // is defined by x.Cmp(y). 29 // 30 // Each Float value also has a precision, rounding mode, and accuracy. 31 // The precision is the maximum number of mantissa bits available to 32 // represent the value. The rounding mode specifies how a result should 33 // be rounded to fit into the mantissa bits, and accuracy describes the 34 // rounding error with respect to the exact result. 35 // 36 // Unless specified otherwise, all operations (including setters) that 37 // specify a *Float variable for the result (usually via the receiver 38 // with the exception of MantExp), round the numeric result according 39 // to the precision and rounding mode of the result variable. 40 // 41 // If the provided result precision is 0 (see below), it is set to the 42 // precision of the argument with the largest precision value before any 43 // rounding takes place, and the rounding mode remains unchanged. Thus, 44 // uninitialized Floats provided as result arguments will have their 45 // precision set to a reasonable value determined by the operands and 46 // their mode is the zero value for RoundingMode (ToNearestEven). 47 // 48 // By setting the desired precision to 24 or 53 and using matching rounding 49 // mode (typically ToNearestEven), Float operations produce the same results 50 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 51 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 52 // Exponent underflow and overflow lead to a 0 or an Infinity for different 53 // values than IEEE-754 because Float exponents have a much larger range. 54 // 55 // The zero (uninitialized) value for a Float is ready to use and represents 56 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 57 // 58 type Float struct { 59 prec uint32 60 mode RoundingMode 61 acc Accuracy 62 form form 63 neg bool 64 mant nat 65 exp int32 66 } 67 68 // An ErrNaN panic is raised by a Float operation that would lead to 69 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 70 type ErrNaN struct { 71 msg string 72 } 73 74 func (err ErrNaN) Error() string { 75 return err.msg 76 } 77 78 // NewFloat allocates and returns a new Float set to x, 79 // with precision 53 and rounding mode ToNearestEven. 80 // NewFloat panics with ErrNaN if x is a NaN. 81 func NewFloat(x float64) *Float { 82 if math.IsNaN(x) { 83 panic(ErrNaN{"NewFloat(NaN)"}) 84 } 85 return new(Float).SetFloat64(x) 86 } 87 88 // Exponent and precision limits. 89 const ( 90 MaxExp = math.MaxInt32 // largest supported exponent 91 MinExp = math.MinInt32 // smallest supported exponent 92 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 93 ) 94 95 // Internal representation: The mantissa bits x.mant of a nonzero finite 96 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 97 // the slice may (but doesn't have to) be shorter if the mantissa contains 98 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 99 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 100 // trailing 0 bits or x.prec is not a multiple of the the Word size _W, 101 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 102 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 103 // 104 // A zero or non-finite Float x ignores x.mant and x.exp. 105 // 106 // x form neg mant exp 107 // ---------------------------------------------------------- 108 // ±0 zero sign - - 109 // 0 < |x| < +Inf finite sign mantissa exponent 110 // ±Inf inf sign - - 111 112 // A form value describes the internal representation. 113 type form byte 114 115 // The form value order is relevant - do not change! 116 const ( 117 zero form = iota 118 finite 119 inf 120 ) 121 122 // RoundingMode determines how a Float value is rounded to the 123 // desired precision. Rounding may change the Float value; the 124 // rounding error is described by the Float's Accuracy. 125 type RoundingMode byte 126 127 // These constants define supported rounding modes. 128 const ( 129 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 130 ToNearestAway // == IEEE 754-2008 roundTiesToAway 131 ToZero // == IEEE 754-2008 roundTowardZero 132 AwayFromZero // no IEEE 754-2008 equivalent 133 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 134 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 135 ) 136 137 //go:generate stringer -type=RoundingMode 138 139 // Accuracy describes the rounding error produced by the most recent 140 // operation that generated a Float value, relative to the exact value. 141 type Accuracy int8 142 143 // Constants describing the Accuracy of a Float. 144 const ( 145 Below Accuracy = -1 146 Exact Accuracy = 0 147 Above Accuracy = +1 148 ) 149 150 //go:generate stringer -type=Accuracy 151 152 // SetPrec sets z's precision to prec and returns the (possibly) rounded 153 // value of z. Rounding occurs according to z's rounding mode if the mantissa 154 // cannot be represented in prec bits without loss of precision. 155 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 156 // If prec > MaxPrec, it is set to MaxPrec. 157 func (z *Float) SetPrec(prec uint) *Float { 158 z.acc = Exact // optimistically assume no rounding is needed 159 160 // special case 161 if prec == 0 { 162 z.prec = 0 163 if z.form == finite { 164 // truncate z to 0 165 z.acc = makeAcc(z.neg) 166 z.form = zero 167 } 168 return z 169 } 170 171 // general case 172 if prec > MaxPrec { 173 prec = MaxPrec 174 } 175 old := z.prec 176 z.prec = uint32(prec) 177 if z.prec < old { 178 z.round(0) 179 } 180 return z 181 } 182 183 func makeAcc(above bool) Accuracy { 184 if above { 185 return Above 186 } 187 return Below 188 } 189 190 // SetMode sets z's rounding mode to mode and returns an exact z. 191 // z remains unchanged otherwise. 192 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 193 func (z *Float) SetMode(mode RoundingMode) *Float { 194 z.mode = mode 195 z.acc = Exact 196 return z 197 } 198 199 // Prec returns the mantissa precision of x in bits. 200 // The result may be 0 for |x| == 0 and |x| == Inf. 201 func (x *Float) Prec() uint { 202 return uint(x.prec) 203 } 204 205 // MinPrec returns the minimum precision required to represent x exactly 206 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 207 // The result is 0 for |x| == 0 and |x| == Inf. 208 func (x *Float) MinPrec() uint { 209 if x.form != finite { 210 return 0 211 } 212 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 213 } 214 215 // Mode returns the rounding mode of x. 216 func (x *Float) Mode() RoundingMode { 217 return x.mode 218 } 219 220 // Acc returns the accuracy of x produced by the most recent operation. 221 func (x *Float) Acc() Accuracy { 222 return x.acc 223 } 224 225 // Sign returns: 226 // 227 // -1 if x < 0 228 // 0 if x is ±0 229 // +1 if x > 0 230 // 231 func (x *Float) Sign() int { 232 if debugFloat { 233 x.validate() 234 } 235 if x.form == zero { 236 return 0 237 } 238 if x.neg { 239 return -1 240 } 241 return 1 242 } 243 244 // MantExp breaks x into its mantissa and exponent components 245 // and returns the exponent. If a non-nil mant argument is 246 // provided its value is set to the mantissa of x, with the 247 // same precision and rounding mode as x. The components 248 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 249 // Calling MantExp with a nil argument is an efficient way to 250 // get the exponent of the receiver. 251 // 252 // Special cases are: 253 // 254 // ( ±0).MantExp(mant) = 0, with mant set to ±0 255 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 256 // 257 // x and mant may be the same in which case x is set to its 258 // mantissa value. 259 func (x *Float) MantExp(mant *Float) (exp int) { 260 if debugFloat { 261 x.validate() 262 } 263 if x.form == finite { 264 exp = int(x.exp) 265 } 266 if mant != nil { 267 mant.Copy(x) 268 if mant.form == finite { 269 mant.exp = 0 270 } 271 } 272 return 273 } 274 275 func (z *Float) setExpAndRound(exp int64, sbit uint) { 276 if exp < MinExp { 277 // underflow 278 z.acc = makeAcc(z.neg) 279 z.form = zero 280 return 281 } 282 283 if exp > MaxExp { 284 // overflow 285 z.acc = makeAcc(!z.neg) 286 z.form = inf 287 return 288 } 289 290 z.form = finite 291 z.exp = int32(exp) 292 z.round(sbit) 293 } 294 295 // SetMantExp sets z to mant × 2**exp and and returns z. 296 // The result z has the same precision and rounding mode 297 // as mant. SetMantExp is an inverse of MantExp but does 298 // not require 0.5 <= |mant| < 1.0. Specifically: 299 // 300 // mant := new(Float) 301 // new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0 302 // 303 // Special cases are: 304 // 305 // z.SetMantExp( ±0, exp) = ±0 306 // z.SetMantExp(±Inf, exp) = ±Inf 307 // 308 // z and mant may be the same in which case z's exponent 309 // is set to exp. 310 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 311 if debugFloat { 312 z.validate() 313 mant.validate() 314 } 315 z.Copy(mant) 316 if z.form != finite { 317 return z 318 } 319 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 320 return z 321 } 322 323 // Signbit returns true if x is negative or negative zero. 324 func (x *Float) Signbit() bool { 325 return x.neg 326 } 327 328 // IsInf reports whether x is +Inf or -Inf. 329 func (x *Float) IsInf() bool { 330 return x.form == inf 331 } 332 333 // IsInt reports whether x is an integer. 334 // ±Inf values are not integers. 335 func (x *Float) IsInt() bool { 336 if debugFloat { 337 x.validate() 338 } 339 // special cases 340 if x.form != finite { 341 return x.form == zero 342 } 343 // x.form == finite 344 if x.exp <= 0 { 345 return false 346 } 347 // x.exp > 0 348 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 349 } 350 351 // debugging support 352 func (x *Float) validate() { 353 if !debugFloat { 354 // avoid performance bugs 355 panic("validate called but debugFloat is not set") 356 } 357 if x.form != finite { 358 return 359 } 360 m := len(x.mant) 361 if m == 0 { 362 panic("nonzero finite number with empty mantissa") 363 } 364 const msb = 1 << (_W - 1) 365 if x.mant[m-1]&msb == 0 { 366 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0))) 367 } 368 if x.prec == 0 { 369 panic("zero precision finite number") 370 } 371 } 372 373 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 374 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 375 // have before calling round. z's mantissa must be normalized (with the msb set) 376 // or empty. 377 // 378 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 379 // sign of z. For correct rounding, the sign of z must be set correctly before 380 // calling round. 381 func (z *Float) round(sbit uint) { 382 if debugFloat { 383 z.validate() 384 } 385 386 z.acc = Exact 387 if z.form != finite { 388 // ±0 or ±Inf => nothing left to do 389 return 390 } 391 // z.form == finite && len(z.mant) > 0 392 // m > 0 implies z.prec > 0 (checked by validate) 393 394 m := uint32(len(z.mant)) // present mantissa length in words 395 bits := m * _W // present mantissa bits; bits > 0 396 if bits <= z.prec { 397 // mantissa fits => nothing to do 398 return 399 } 400 // bits > z.prec 401 402 // Rounding is based on two bits: the rounding bit (rbit) and the 403 // sticky bit (sbit). The rbit is the bit immediately before the 404 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 405 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 406 // 407 // rbit sbit => "fractional part" 408 // 409 // 0 0 == 0 410 // 0 1 > 0 , < 0.5 411 // 1 0 == 0.5 412 // 1 1 > 0.5, < 1.0 413 414 // bits > z.prec: mantissa too large => round 415 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 416 rbit := z.mant.bit(r) & 1 // rounding bit; be safe and ensure it's a single bit 417 if sbit == 0 { 418 // TODO(gri) if rbit != 0 we don't need to compute sbit for some rounding modes (optimization) 419 sbit = z.mant.sticky(r) 420 } 421 sbit &= 1 // be safe and ensure it's a single bit 422 423 // cut off extra words 424 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 425 if m > n { 426 copy(z.mant, z.mant[m-n:]) // move n last words to front 427 z.mant = z.mant[:n] 428 } 429 430 // determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word 431 ntz := n*_W - z.prec // 0 <= ntz < _W 432 lsb := Word(1) << ntz 433 434 // round if result is inexact 435 if rbit|sbit != 0 { 436 // Make rounding decision: The result mantissa is truncated ("rounded down") 437 // by default. Decide if we need to increment, or "round up", the (unsigned) 438 // mantissa. 439 inc := false 440 switch z.mode { 441 case ToNegativeInf: 442 inc = z.neg 443 case ToZero: 444 // nothing to do 445 case ToNearestEven: 446 inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0) 447 case ToNearestAway: 448 inc = rbit != 0 449 case AwayFromZero: 450 inc = true 451 case ToPositiveInf: 452 inc = !z.neg 453 default: 454 panic("unreachable") 455 } 456 457 // A positive result (!z.neg) is Above the exact result if we increment, 458 // and it's Below if we truncate (Exact results require no rounding). 459 // For a negative result (z.neg) it is exactly the opposite. 460 z.acc = makeAcc(inc != z.neg) 461 462 if inc { 463 // add 1 to mantissa 464 if addVW(z.mant, z.mant, lsb) != 0 { 465 // mantissa overflow => adjust exponent 466 if z.exp >= MaxExp { 467 // exponent overflow 468 z.form = inf 469 return 470 } 471 z.exp++ 472 // adjust mantissa: divide by 2 to compensate for exponent adjustment 473 shrVU(z.mant, z.mant, 1) 474 // set msb == carry == 1 from the mantissa overflow above 475 const msb = 1 << (_W - 1) 476 z.mant[n-1] |= msb 477 } 478 } 479 } 480 481 // zero out trailing bits in least-significant word 482 z.mant[0] &^= lsb - 1 483 484 if debugFloat { 485 z.validate() 486 } 487 } 488 489 func (z *Float) setBits64(neg bool, x uint64) *Float { 490 if z.prec == 0 { 491 z.prec = 64 492 } 493 z.acc = Exact 494 z.neg = neg 495 if x == 0 { 496 z.form = zero 497 return z 498 } 499 // x != 0 500 z.form = finite 501 s := nlz64(x) 502 z.mant = z.mant.setUint64(x << s) 503 z.exp = int32(64 - s) // always fits 504 if z.prec < 64 { 505 z.round(0) 506 } 507 return z 508 } 509 510 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 511 // If z's precision is 0, it is changed to 64 (and rounding will have 512 // no effect). 513 func (z *Float) SetUint64(x uint64) *Float { 514 return z.setBits64(false, x) 515 } 516 517 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 518 // If z's precision is 0, it is changed to 64 (and rounding will have 519 // no effect). 520 func (z *Float) SetInt64(x int64) *Float { 521 u := x 522 if u < 0 { 523 u = -u 524 } 525 // We cannot simply call z.SetUint64(uint64(u)) and change 526 // the sign afterwards because the sign affects rounding. 527 return z.setBits64(x < 0, uint64(u)) 528 } 529 530 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 531 // If z's precision is 0, it is changed to 53 (and rounding will have 532 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 533 func (z *Float) SetFloat64(x float64) *Float { 534 if z.prec == 0 { 535 z.prec = 53 536 } 537 if math.IsNaN(x) { 538 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 539 } 540 z.acc = Exact 541 z.neg = math.Signbit(x) // handle -0, -Inf correctly 542 if x == 0 { 543 z.form = zero 544 return z 545 } 546 if math.IsInf(x, 0) { 547 z.form = inf 548 return z 549 } 550 // normalized x != 0 551 z.form = finite 552 fmant, exp := math.Frexp(x) // get normalized mantissa 553 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 554 z.exp = int32(exp) // always fits 555 if z.prec < 53 { 556 z.round(0) 557 } 558 return z 559 } 560 561 // fnorm normalizes mantissa m by shifting it to the left 562 // such that the msb of the most-significant word (msw) is 1. 563 // It returns the shift amount. It assumes that len(m) != 0. 564 func fnorm(m nat) int64 { 565 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 566 panic("msw of mantissa is 0") 567 } 568 s := nlz(m[len(m)-1]) 569 if s > 0 { 570 c := shlVU(m, m, s) 571 if debugFloat && c != 0 { 572 panic("nlz or shlVU incorrect") 573 } 574 } 575 return int64(s) 576 } 577 578 // SetInt sets z to the (possibly rounded) value of x and returns z. 579 // If z's precision is 0, it is changed to the larger of x.BitLen() 580 // or 64 (and rounding will have no effect). 581 func (z *Float) SetInt(x *Int) *Float { 582 // TODO(gri) can be more efficient if z.prec > 0 583 // but small compared to the size of x, or if there 584 // are many trailing 0's. 585 bits := uint32(x.BitLen()) 586 if z.prec == 0 { 587 z.prec = umax32(bits, 64) 588 } 589 z.acc = Exact 590 z.neg = x.neg 591 if len(x.abs) == 0 { 592 z.form = zero 593 return z 594 } 595 // x != 0 596 z.mant = z.mant.set(x.abs) 597 fnorm(z.mant) 598 z.setExpAndRound(int64(bits), 0) 599 return z 600 } 601 602 // SetRat sets z to the (possibly rounded) value of x and returns z. 603 // If z's precision is 0, it is changed to the largest of a.BitLen(), 604 // b.BitLen(), or 64; with x = a/b. 605 func (z *Float) SetRat(x *Rat) *Float { 606 if x.IsInt() { 607 return z.SetInt(x.Num()) 608 } 609 var a, b Float 610 a.SetInt(x.Num()) 611 b.SetInt(x.Denom()) 612 if z.prec == 0 { 613 z.prec = umax32(a.prec, b.prec) 614 } 615 return z.Quo(&a, &b) 616 } 617 618 // SetInf sets z to the infinite Float -Inf if signbit is 619 // set, or +Inf if signbit is not set, and returns z. The 620 // precision of z is unchanged and the result is always 621 // Exact. 622 func (z *Float) SetInf(signbit bool) *Float { 623 z.acc = Exact 624 z.form = inf 625 z.neg = signbit 626 return z 627 } 628 629 // Set sets z to the (possibly rounded) value of x and returns z. 630 // If z's precision is 0, it is changed to the precision of x 631 // before setting z (and rounding will have no effect). 632 // Rounding is performed according to z's precision and rounding 633 // mode; and z's accuracy reports the result error relative to the 634 // exact (not rounded) result. 635 func (z *Float) Set(x *Float) *Float { 636 if debugFloat { 637 x.validate() 638 } 639 z.acc = Exact 640 if z != x { 641 z.form = x.form 642 z.neg = x.neg 643 if x.form == finite { 644 z.exp = x.exp 645 z.mant = z.mant.set(x.mant) 646 } 647 if z.prec == 0 { 648 z.prec = x.prec 649 } else if z.prec < x.prec { 650 z.round(0) 651 } 652 } 653 return z 654 } 655 656 // Copy sets z to x, with the same precision, rounding mode, and 657 // accuracy as x, and returns z. x is not changed even if z and 658 // x are the same. 659 func (z *Float) Copy(x *Float) *Float { 660 if debugFloat { 661 x.validate() 662 } 663 if z != x { 664 z.prec = x.prec 665 z.mode = x.mode 666 z.acc = x.acc 667 z.form = x.form 668 z.neg = x.neg 669 if z.form == finite { 670 z.mant = z.mant.set(x.mant) 671 z.exp = x.exp 672 } 673 } 674 return z 675 } 676 677 // msb32 returns the 32 most significant bits of x. 678 func msb32(x nat) uint32 { 679 i := len(x) - 1 680 if i < 0 { 681 return 0 682 } 683 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 684 panic("x not normalized") 685 } 686 switch _W { 687 case 32: 688 return uint32(x[i]) 689 case 64: 690 return uint32(x[i] >> 32) 691 } 692 panic("unreachable") 693 } 694 695 // msb64 returns the 64 most significant bits of x. 696 func msb64(x nat) uint64 { 697 i := len(x) - 1 698 if i < 0 { 699 return 0 700 } 701 if debugFloat && x[i]&(1<<(_W-1)) == 0 { 702 panic("x not normalized") 703 } 704 switch _W { 705 case 32: 706 v := uint64(x[i]) << 32 707 if i > 0 { 708 v |= uint64(x[i-1]) 709 } 710 return v 711 case 64: 712 return uint64(x[i]) 713 } 714 panic("unreachable") 715 } 716 717 // Uint64 returns the unsigned integer resulting from truncating x 718 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 719 // if x is an integer and Below otherwise. 720 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 721 // for x > math.MaxUint64. 722 func (x *Float) Uint64() (uint64, Accuracy) { 723 if debugFloat { 724 x.validate() 725 } 726 727 switch x.form { 728 case finite: 729 if x.neg { 730 return 0, Above 731 } 732 // 0 < x < +Inf 733 if x.exp <= 0 { 734 // 0 < x < 1 735 return 0, Below 736 } 737 // 1 <= x < Inf 738 if x.exp <= 64 { 739 // u = trunc(x) fits into a uint64 740 u := msb64(x.mant) >> (64 - uint32(x.exp)) 741 if x.MinPrec() <= 64 { 742 return u, Exact 743 } 744 return u, Below // x truncated 745 } 746 // x too large 747 return math.MaxUint64, Below 748 749 case zero: 750 return 0, Exact 751 752 case inf: 753 if x.neg { 754 return 0, Above 755 } 756 return math.MaxUint64, Below 757 } 758 759 panic("unreachable") 760 } 761 762 // Int64 returns the integer resulting from truncating x towards zero. 763 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 764 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 765 // The result is (math.MinInt64, Above) for x < math.MinInt64, 766 // and (math.MaxInt64, Below) for x > math.MaxInt64. 767 func (x *Float) Int64() (int64, Accuracy) { 768 if debugFloat { 769 x.validate() 770 } 771 772 switch x.form { 773 case finite: 774 // 0 < |x| < +Inf 775 acc := makeAcc(x.neg) 776 if x.exp <= 0 { 777 // 0 < |x| < 1 778 return 0, acc 779 } 780 // x.exp > 0 781 782 // 1 <= |x| < +Inf 783 if x.exp <= 63 { 784 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 785 i := int64(msb64(x.mant) >> (64 - uint32(x.exp))) 786 if x.neg { 787 i = -i 788 } 789 if x.MinPrec() <= uint(x.exp) { 790 return i, Exact 791 } 792 return i, acc // x truncated 793 } 794 if x.neg { 795 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 796 if x.exp == 64 && x.MinPrec() == 1 { 797 acc = Exact 798 } 799 return math.MinInt64, acc 800 } 801 // x too large 802 return math.MaxInt64, Below 803 804 case zero: 805 return 0, Exact 806 807 case inf: 808 if x.neg { 809 return math.MinInt64, Above 810 } 811 return math.MaxInt64, Below 812 } 813 814 panic("unreachable") 815 } 816 817 // Float32 returns the float32 value nearest to x. If x is too small to be 818 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 819 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 820 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 821 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 822 func (x *Float) Float32() (float32, Accuracy) { 823 if debugFloat { 824 x.validate() 825 } 826 827 switch x.form { 828 case finite: 829 // 0 < |x| < +Inf 830 831 const ( 832 fbits = 32 // float size 833 mbits = 23 // mantissa size (excluding implicit msb) 834 ebits = fbits - mbits - 1 // 8 exponent size 835 bias = 1<<(ebits-1) - 1 // 127 exponent bias 836 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 837 emin = 1 - bias // -126 smallest unbiased exponent (normal) 838 emax = bias // 127 largest unbiased exponent (normal) 839 ) 840 841 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa. 842 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 843 844 // Compute precision p for float32 mantissa. 845 // If the exponent is too small, we have a denormal number before 846 // rounding and fewer than p mantissa bits of precision available 847 // (the exponent remains fixed but the mantissa gets shifted right). 848 p := mbits + 1 // precision of normal float 849 if e < emin { 850 // recompute precision 851 p = mbits + 1 - emin + int(e) 852 // If p == 0, the mantissa of x is shifted so much to the right 853 // that its msb falls immediately to the right of the float32 854 // mantissa space. In other words, if the smallest denormal is 855 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 856 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 857 // If m == 0.5, it is rounded down to even, i.e., 0.0. 858 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 859 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 860 // underflow to ±0 861 if x.neg { 862 var z float32 863 return -z, Above 864 } 865 return 0.0, Below 866 } 867 // otherwise, round up 868 // We handle p == 0 explicitly because it's easy and because 869 // Float.round doesn't support rounding to 0 bits of precision. 870 if p == 0 { 871 if x.neg { 872 return -math.SmallestNonzeroFloat32, Below 873 } 874 return math.SmallestNonzeroFloat32, Above 875 } 876 } 877 // p > 0 878 879 // round 880 var r Float 881 r.prec = uint32(p) 882 r.Set(x) 883 e = r.exp - 1 884 885 // Rounding may have caused r to overflow to ±Inf 886 // (rounding never causes underflows to 0). 887 // If the exponent is too large, also overflow to ±Inf. 888 if r.form == inf || e > emax { 889 // overflow 890 if x.neg { 891 return float32(math.Inf(-1)), Below 892 } 893 return float32(math.Inf(+1)), Above 894 } 895 // e <= emax 896 897 // Determine sign, biased exponent, and mantissa. 898 var sign, bexp, mant uint32 899 if x.neg { 900 sign = 1 << (fbits - 1) 901 } 902 903 // Rounding may have caused a denormal number to 904 // become normal. Check again. 905 if e < emin { 906 // denormal number: recompute precision 907 // Since rounding may have at best increased precision 908 // and we have eliminated p <= 0 early, we know p > 0. 909 // bexp == 0 for denormals 910 p = mbits + 1 - emin + int(e) 911 mant = msb32(r.mant) >> uint(fbits-p) 912 } else { 913 // normal number: emin <= e <= emax 914 bexp = uint32(e+bias) << mbits 915 mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 916 } 917 918 return math.Float32frombits(sign | bexp | mant), r.acc 919 920 case zero: 921 if x.neg { 922 var z float32 923 return -z, Exact 924 } 925 return 0.0, Exact 926 927 case inf: 928 if x.neg { 929 return float32(math.Inf(-1)), Exact 930 } 931 return float32(math.Inf(+1)), Exact 932 } 933 934 panic("unreachable") 935 } 936 937 // Float64 returns the float64 value nearest to x. If x is too small to be 938 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 939 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 940 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 941 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 942 func (x *Float) Float64() (float64, Accuracy) { 943 if debugFloat { 944 x.validate() 945 } 946 947 switch x.form { 948 case finite: 949 // 0 < |x| < +Inf 950 951 const ( 952 fbits = 64 // float size 953 mbits = 52 // mantissa size (excluding implicit msb) 954 ebits = fbits - mbits - 1 // 11 exponent size 955 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 956 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 957 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 958 emax = bias // 1023 largest unbiased exponent (normal) 959 ) 960 961 // Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa. 962 e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0 963 964 // Compute precision p for float64 mantissa. 965 // If the exponent is too small, we have a denormal number before 966 // rounding and fewer than p mantissa bits of precision available 967 // (the exponent remains fixed but the mantissa gets shifted right). 968 p := mbits + 1 // precision of normal float 969 if e < emin { 970 // recompute precision 971 p = mbits + 1 - emin + int(e) 972 // If p == 0, the mantissa of x is shifted so much to the right 973 // that its msb falls immediately to the right of the float64 974 // mantissa space. In other words, if the smallest denormal is 975 // considered "1.0", for p == 0, the mantissa value m is >= 0.5. 976 // If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal. 977 // If m == 0.5, it is rounded down to even, i.e., 0.0. 978 // If p < 0, the mantissa value m is <= "0.25" which is never rounded up. 979 if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ { 980 // underflow to ±0 981 if x.neg { 982 var z float64 983 return -z, Above 984 } 985 return 0.0, Below 986 } 987 // otherwise, round up 988 // We handle p == 0 explicitly because it's easy and because 989 // Float.round doesn't support rounding to 0 bits of precision. 990 if p == 0 { 991 if x.neg { 992 return -math.SmallestNonzeroFloat64, Below 993 } 994 return math.SmallestNonzeroFloat64, Above 995 } 996 } 997 // p > 0 998 999 // round 1000 var r Float 1001 r.prec = uint32(p) 1002 r.Set(x) 1003 e = r.exp - 1 1004 1005 // Rounding may have caused r to overflow to ±Inf 1006 // (rounding never causes underflows to 0). 1007 // If the exponent is too large, also overflow to ±Inf. 1008 if r.form == inf || e > emax { 1009 // overflow 1010 if x.neg { 1011 return math.Inf(-1), Below 1012 } 1013 return math.Inf(+1), Above 1014 } 1015 // e <= emax 1016 1017 // Determine sign, biased exponent, and mantissa. 1018 var sign, bexp, mant uint64 1019 if x.neg { 1020 sign = 1 << (fbits - 1) 1021 } 1022 1023 // Rounding may have caused a denormal number to 1024 // become normal. Check again. 1025 if e < emin { 1026 // denormal number: recompute precision 1027 // Since rounding may have at best increased precision 1028 // and we have eliminated p <= 0 early, we know p > 0. 1029 // bexp == 0 for denormals 1030 p = mbits + 1 - emin + int(e) 1031 mant = msb64(r.mant) >> uint(fbits-p) 1032 } else { 1033 // normal number: emin <= e <= emax 1034 bexp = uint64(e+bias) << mbits 1035 mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1036 } 1037 1038 return math.Float64frombits(sign | bexp | mant), r.acc 1039 1040 case zero: 1041 if x.neg { 1042 var z float64 1043 return -z, Exact 1044 } 1045 return 0.0, Exact 1046 1047 case inf: 1048 if x.neg { 1049 return math.Inf(-1), Exact 1050 } 1051 return math.Inf(+1), Exact 1052 } 1053 1054 panic("unreachable") 1055 } 1056 1057 // Int returns the result of truncating x towards zero; 1058 // or nil if x is an infinity. 1059 // The result is Exact if x.IsInt(); otherwise it is Below 1060 // for x > 0, and Above for x < 0. 1061 // If a non-nil *Int argument z is provided, Int stores 1062 // the result in z instead of allocating a new Int. 1063 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1064 if debugFloat { 1065 x.validate() 1066 } 1067 1068 if z == nil && x.form <= finite { 1069 z = new(Int) 1070 } 1071 1072 switch x.form { 1073 case finite: 1074 // 0 < |x| < +Inf 1075 acc := makeAcc(x.neg) 1076 if x.exp <= 0 { 1077 // 0 < |x| < 1 1078 return z.SetInt64(0), acc 1079 } 1080 // x.exp > 0 1081 1082 // 1 <= |x| < +Inf 1083 // determine minimum required precision for x 1084 allBits := uint(len(x.mant)) * _W 1085 exp := uint(x.exp) 1086 if x.MinPrec() <= exp { 1087 acc = Exact 1088 } 1089 // shift mantissa as needed 1090 if z == nil { 1091 z = new(Int) 1092 } 1093 z.neg = x.neg 1094 switch { 1095 case exp > allBits: 1096 z.abs = z.abs.shl(x.mant, exp-allBits) 1097 default: 1098 z.abs = z.abs.set(x.mant) 1099 case exp < allBits: 1100 z.abs = z.abs.shr(x.mant, allBits-exp) 1101 } 1102 return z, acc 1103 1104 case zero: 1105 return z.SetInt64(0), Exact 1106 1107 case inf: 1108 return nil, makeAcc(x.neg) 1109 } 1110 1111 panic("unreachable") 1112 } 1113 1114 // Rat returns the rational number corresponding to x; 1115 // or nil if x is an infinity. 1116 // The result is Exact if x is not an Inf. 1117 // If a non-nil *Rat argument z is provided, Rat stores 1118 // the result in z instead of allocating a new Rat. 1119 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1120 if debugFloat { 1121 x.validate() 1122 } 1123 1124 if z == nil && x.form <= finite { 1125 z = new(Rat) 1126 } 1127 1128 switch x.form { 1129 case finite: 1130 // 0 < |x| < +Inf 1131 allBits := int32(len(x.mant)) * _W 1132 // build up numerator and denominator 1133 z.a.neg = x.neg 1134 switch { 1135 case x.exp > allBits: 1136 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1137 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1138 // z already in normal form 1139 default: 1140 z.a.abs = z.a.abs.set(x.mant) 1141 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1142 // z already in normal form 1143 case x.exp < allBits: 1144 z.a.abs = z.a.abs.set(x.mant) 1145 t := z.b.abs.setUint64(1) 1146 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1147 z.norm() 1148 } 1149 return z, Exact 1150 1151 case zero: 1152 return z.SetInt64(0), Exact 1153 1154 case inf: 1155 return nil, makeAcc(x.neg) 1156 } 1157 1158 panic("unreachable") 1159 } 1160 1161 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1162 // and returns z. 1163 func (z *Float) Abs(x *Float) *Float { 1164 z.Set(x) 1165 z.neg = false 1166 return z 1167 } 1168 1169 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1170 // and returns z. 1171 func (z *Float) Neg(x *Float) *Float { 1172 z.Set(x) 1173 z.neg = !z.neg 1174 return z 1175 } 1176 1177 func validateBinaryOperands(x, y *Float) { 1178 if !debugFloat { 1179 // avoid performance bugs 1180 panic("validateBinaryOperands called but debugFloat is not set") 1181 } 1182 if len(x.mant) == 0 { 1183 panic("empty mantissa for x") 1184 } 1185 if len(y.mant) == 0 { 1186 panic("empty mantissa for y") 1187 } 1188 } 1189 1190 // z = x + y, ignoring signs of x and y for the addition 1191 // but using the sign of z for rounding the result. 1192 // x and y must have a non-empty mantissa and valid exponent. 1193 func (z *Float) uadd(x, y *Float) { 1194 // Note: This implementation requires 2 shifts most of the 1195 // time. It is also inefficient if exponents or precisions 1196 // differ by wide margins. The following article describes 1197 // an efficient (but much more complicated) implementation 1198 // compatible with the internal representation used here: 1199 // 1200 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1201 // Point Addition With Exact Rounding (as in the MPFR Library)" 1202 // http://www.vinc17.net/research/papers/rnc6.pdf 1203 1204 if debugFloat { 1205 validateBinaryOperands(x, y) 1206 } 1207 1208 // compute exponents ex, ey for mantissa with "binary point" 1209 // on the right (mantissa.0) - use int64 to avoid overflow 1210 ex := int64(x.exp) - int64(len(x.mant))*_W 1211 ey := int64(y.exp) - int64(len(y.mant))*_W 1212 1213 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1214 1215 // TODO(gri) having a combined add-and-shift primitive 1216 // could make this code significantly faster 1217 switch { 1218 case ex < ey: 1219 if al { 1220 t := nat(nil).shl(y.mant, uint(ey-ex)) 1221 z.mant = z.mant.add(x.mant, t) 1222 } else { 1223 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1224 z.mant = z.mant.add(x.mant, z.mant) 1225 } 1226 default: 1227 // ex == ey, no shift needed 1228 z.mant = z.mant.add(x.mant, y.mant) 1229 case ex > ey: 1230 if al { 1231 t := nat(nil).shl(x.mant, uint(ex-ey)) 1232 z.mant = z.mant.add(t, y.mant) 1233 } else { 1234 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1235 z.mant = z.mant.add(z.mant, y.mant) 1236 } 1237 ex = ey 1238 } 1239 // len(z.mant) > 0 1240 1241 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1242 } 1243 1244 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1245 // but using the sign of z for rounding the result. 1246 // x and y must have a non-empty mantissa and valid exponent. 1247 func (z *Float) usub(x, y *Float) { 1248 // This code is symmetric to uadd. 1249 // We have not factored the common code out because 1250 // eventually uadd (and usub) should be optimized 1251 // by special-casing, and the code will diverge. 1252 1253 if debugFloat { 1254 validateBinaryOperands(x, y) 1255 } 1256 1257 ex := int64(x.exp) - int64(len(x.mant))*_W 1258 ey := int64(y.exp) - int64(len(y.mant))*_W 1259 1260 al := alias(z.mant, x.mant) || alias(z.mant, y.mant) 1261 1262 switch { 1263 case ex < ey: 1264 if al { 1265 t := nat(nil).shl(y.mant, uint(ey-ex)) 1266 z.mant = t.sub(x.mant, t) 1267 } else { 1268 z.mant = z.mant.shl(y.mant, uint(ey-ex)) 1269 z.mant = z.mant.sub(x.mant, z.mant) 1270 } 1271 default: 1272 // ex == ey, no shift needed 1273 z.mant = z.mant.sub(x.mant, y.mant) 1274 case ex > ey: 1275 if al { 1276 t := nat(nil).shl(x.mant, uint(ex-ey)) 1277 z.mant = t.sub(t, y.mant) 1278 } else { 1279 z.mant = z.mant.shl(x.mant, uint(ex-ey)) 1280 z.mant = z.mant.sub(z.mant, y.mant) 1281 } 1282 ex = ey 1283 } 1284 1285 // operands may have canceled each other out 1286 if len(z.mant) == 0 { 1287 z.acc = Exact 1288 z.form = zero 1289 z.neg = false 1290 return 1291 } 1292 // len(z.mant) > 0 1293 1294 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1295 } 1296 1297 // z = x * y, ignoring signs of x and y for the multiplication 1298 // but using the sign of z for rounding the result. 1299 // x and y must have a non-empty mantissa and valid exponent. 1300 func (z *Float) umul(x, y *Float) { 1301 if debugFloat { 1302 validateBinaryOperands(x, y) 1303 } 1304 1305 // Note: This is doing too much work if the precision 1306 // of z is less than the sum of the precisions of x 1307 // and y which is often the case (e.g., if all floats 1308 // have the same precision). 1309 // TODO(gri) Optimize this for the common case. 1310 1311 e := int64(x.exp) + int64(y.exp) 1312 z.mant = z.mant.mul(x.mant, y.mant) 1313 1314 z.setExpAndRound(e-fnorm(z.mant), 0) 1315 } 1316 1317 // z = x / y, ignoring signs of x and y for the division 1318 // but using the sign of z for rounding the result. 1319 // x and y must have a non-empty mantissa and valid exponent. 1320 func (z *Float) uquo(x, y *Float) { 1321 if debugFloat { 1322 validateBinaryOperands(x, y) 1323 } 1324 1325 // mantissa length in words for desired result precision + 1 1326 // (at least one extra bit so we get the rounding bit after 1327 // the division) 1328 n := int(z.prec/_W) + 1 1329 1330 // compute adjusted x.mant such that we get enough result precision 1331 xadj := x.mant 1332 if d := n - len(x.mant) + len(y.mant); d > 0 { 1333 // d extra words needed => add d "0 digits" to x 1334 xadj = make(nat, len(x.mant)+d) 1335 copy(xadj[d:], x.mant) 1336 } 1337 // TODO(gri): If we have too many digits (d < 0), we should be able 1338 // to shorten x for faster division. But we must be extra careful 1339 // with rounding in that case. 1340 1341 // Compute d before division since there may be aliasing of x.mant 1342 // (via xadj) or y.mant with z.mant. 1343 d := len(xadj) - len(y.mant) 1344 1345 // divide 1346 var r nat 1347 z.mant, r = z.mant.div(nil, xadj, y.mant) 1348 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1349 1350 // The result is long enough to include (at least) the rounding bit. 1351 // If there's a non-zero remainder, the corresponding fractional part 1352 // (if it were computed), would have a non-zero sticky bit (if it were 1353 // zero, it couldn't have a non-zero remainder). 1354 var sbit uint 1355 if len(r) > 0 { 1356 sbit = 1 1357 } 1358 1359 z.setExpAndRound(e-fnorm(z.mant), sbit) 1360 } 1361 1362 // ucmp returns -1, 0, or +1, depending on whether 1363 // |x| < |y|, |x| == |y|, or |x| > |y|. 1364 // x and y must have a non-empty mantissa and valid exponent. 1365 func (x *Float) ucmp(y *Float) int { 1366 if debugFloat { 1367 validateBinaryOperands(x, y) 1368 } 1369 1370 switch { 1371 case x.exp < y.exp: 1372 return -1 1373 case x.exp > y.exp: 1374 return +1 1375 } 1376 // x.exp == y.exp 1377 1378 // compare mantissas 1379 i := len(x.mant) 1380 j := len(y.mant) 1381 for i > 0 || j > 0 { 1382 var xm, ym Word 1383 if i > 0 { 1384 i-- 1385 xm = x.mant[i] 1386 } 1387 if j > 0 { 1388 j-- 1389 ym = y.mant[j] 1390 } 1391 switch { 1392 case xm < ym: 1393 return -1 1394 case xm > ym: 1395 return +1 1396 } 1397 } 1398 1399 return 0 1400 } 1401 1402 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1403 // 1404 // When neither the inputs nor result are NaN, the sign of a product or 1405 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1406 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1407 // one of the addends’ signs; and the sign of the result of conversions, 1408 // the quantize operation, the roundToIntegral operations, and the 1409 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1410 // These rules shall apply even when operands or results are zero or infinite. 1411 // 1412 // When the sum of two operands with opposite signs (or the difference of 1413 // two operands with like signs) is exactly zero, the sign of that sum (or 1414 // difference) shall be +0 in all rounding-direction attributes except 1415 // roundTowardNegative; under that attribute, the sign of an exact zero 1416 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1417 // sign as x even when x is zero. 1418 // 1419 // See also: https://play.golang.org/p/RtH3UCt5IH 1420 1421 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1422 // it is changed to the larger of x's or y's precision before the operation. 1423 // Rounding is performed according to z's precision and rounding mode; and 1424 // z's accuracy reports the result error relative to the exact (not rounded) 1425 // result. Add panics with ErrNaN if x and y are infinities with opposite 1426 // signs. The value of z is undefined in that case. 1427 // 1428 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1429 func (z *Float) Add(x, y *Float) *Float { 1430 if debugFloat { 1431 x.validate() 1432 y.validate() 1433 } 1434 1435 if z.prec == 0 { 1436 z.prec = umax32(x.prec, y.prec) 1437 } 1438 1439 if x.form == finite && y.form == finite { 1440 // x + y (common case) 1441 z.neg = x.neg 1442 if x.neg == y.neg { 1443 // x + y == x + y 1444 // (-x) + (-y) == -(x + y) 1445 z.uadd(x, y) 1446 } else { 1447 // x + (-y) == x - y == -(y - x) 1448 // (-x) + y == y - x == -(x - y) 1449 if x.ucmp(y) > 0 { 1450 z.usub(x, y) 1451 } else { 1452 z.neg = !z.neg 1453 z.usub(y, x) 1454 } 1455 } 1456 return z 1457 } 1458 1459 if x.form == inf && y.form == inf && x.neg != y.neg { 1460 // +Inf + -Inf 1461 // -Inf + +Inf 1462 // value of z is undefined but make sure it's valid 1463 z.acc = Exact 1464 z.form = zero 1465 z.neg = false 1466 panic(ErrNaN{"addition of infinities with opposite signs"}) 1467 } 1468 1469 if x.form == zero && y.form == zero { 1470 // ±0 + ±0 1471 z.acc = Exact 1472 z.form = zero 1473 z.neg = x.neg && y.neg // -0 + -0 == -0 1474 return z 1475 } 1476 1477 if x.form == inf || y.form == zero { 1478 // ±Inf + y 1479 // x + ±0 1480 return z.Set(x) 1481 } 1482 1483 // ±0 + y 1484 // x + ±Inf 1485 return z.Set(y) 1486 } 1487 1488 // Sub sets z to the rounded difference x-y and returns z. 1489 // Precision, rounding, and accuracy reporting are as for Add. 1490 // Sub panics with ErrNaN if x and y are infinities with equal 1491 // signs. The value of z is undefined in that case. 1492 func (z *Float) Sub(x, y *Float) *Float { 1493 if debugFloat { 1494 x.validate() 1495 y.validate() 1496 } 1497 1498 if z.prec == 0 { 1499 z.prec = umax32(x.prec, y.prec) 1500 } 1501 1502 if x.form == finite && y.form == finite { 1503 // x - y (common case) 1504 z.neg = x.neg 1505 if x.neg != y.neg { 1506 // x - (-y) == x + y 1507 // (-x) - y == -(x + y) 1508 z.uadd(x, y) 1509 } else { 1510 // x - y == x - y == -(y - x) 1511 // (-x) - (-y) == y - x == -(x - y) 1512 if x.ucmp(y) > 0 { 1513 z.usub(x, y) 1514 } else { 1515 z.neg = !z.neg 1516 z.usub(y, x) 1517 } 1518 } 1519 return z 1520 } 1521 1522 if x.form == inf && y.form == inf && x.neg == y.neg { 1523 // +Inf - +Inf 1524 // -Inf - -Inf 1525 // value of z is undefined but make sure it's valid 1526 z.acc = Exact 1527 z.form = zero 1528 z.neg = false 1529 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1530 } 1531 1532 if x.form == zero && y.form == zero { 1533 // ±0 - ±0 1534 z.acc = Exact 1535 z.form = zero 1536 z.neg = x.neg && !y.neg // -0 - +0 == -0 1537 return z 1538 } 1539 1540 if x.form == inf || y.form == zero { 1541 // ±Inf - y 1542 // x - ±0 1543 return z.Set(x) 1544 } 1545 1546 // ±0 - y 1547 // x - ±Inf 1548 return z.Neg(y) 1549 } 1550 1551 // Mul sets z to the rounded product x*y and returns z. 1552 // Precision, rounding, and accuracy reporting are as for Add. 1553 // Mul panics with ErrNaN if one operand is zero and the other 1554 // operand an infinity. The value of z is undefined in that case. 1555 func (z *Float) Mul(x, y *Float) *Float { 1556 if debugFloat { 1557 x.validate() 1558 y.validate() 1559 } 1560 1561 if z.prec == 0 { 1562 z.prec = umax32(x.prec, y.prec) 1563 } 1564 1565 z.neg = x.neg != y.neg 1566 1567 if x.form == finite && y.form == finite { 1568 // x * y (common case) 1569 z.umul(x, y) 1570 return z 1571 } 1572 1573 z.acc = Exact 1574 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1575 // ±0 * ±Inf 1576 // ±Inf * ±0 1577 // value of z is undefined but make sure it's valid 1578 z.form = zero 1579 z.neg = false 1580 panic(ErrNaN{"multiplication of zero with infinity"}) 1581 } 1582 1583 if x.form == inf || y.form == inf { 1584 // ±Inf * y 1585 // x * ±Inf 1586 z.form = inf 1587 return z 1588 } 1589 1590 // ±0 * y 1591 // x * ±0 1592 z.form = zero 1593 return z 1594 } 1595 1596 // Quo sets z to the rounded quotient x/y and returns z. 1597 // Precision, rounding, and accuracy reporting are as for Add. 1598 // Quo panics with ErrNaN if both operands are zero or infinities. 1599 // The value of z is undefined in that case. 1600 func (z *Float) Quo(x, y *Float) *Float { 1601 if debugFloat { 1602 x.validate() 1603 y.validate() 1604 } 1605 1606 if z.prec == 0 { 1607 z.prec = umax32(x.prec, y.prec) 1608 } 1609 1610 z.neg = x.neg != y.neg 1611 1612 if x.form == finite && y.form == finite { 1613 // x / y (common case) 1614 z.uquo(x, y) 1615 return z 1616 } 1617 1618 z.acc = Exact 1619 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1620 // ±0 / ±0 1621 // ±Inf / ±Inf 1622 // value of z is undefined but make sure it's valid 1623 z.form = zero 1624 z.neg = false 1625 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1626 } 1627 1628 if x.form == zero || y.form == inf { 1629 // ±0 / y 1630 // x / ±Inf 1631 z.form = zero 1632 return z 1633 } 1634 1635 // x / ±0 1636 // ±Inf / y 1637 z.form = inf 1638 return z 1639 } 1640 1641 // Cmp compares x and y and returns: 1642 // 1643 // -1 if x < y 1644 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1645 // +1 if x > y 1646 // 1647 func (x *Float) Cmp(y *Float) int { 1648 if debugFloat { 1649 x.validate() 1650 y.validate() 1651 } 1652 1653 mx := x.ord() 1654 my := y.ord() 1655 switch { 1656 case mx < my: 1657 return -1 1658 case mx > my: 1659 return +1 1660 } 1661 // mx == my 1662 1663 // only if |mx| == 1 we have to compare the mantissae 1664 switch mx { 1665 case -1: 1666 return y.ucmp(x) 1667 case +1: 1668 return x.ucmp(y) 1669 } 1670 1671 return 0 1672 } 1673 1674 // ord classifies x and returns: 1675 // 1676 // -2 if -Inf == x 1677 // -1 if -Inf < x < 0 1678 // 0 if x == 0 (signed or unsigned) 1679 // +1 if 0 < x < +Inf 1680 // +2 if x == +Inf 1681 // 1682 func (x *Float) ord() int { 1683 var m int 1684 switch x.form { 1685 case finite: 1686 m = 1 1687 case zero: 1688 return 0 1689 case inf: 1690 m = 2 1691 } 1692 if x.neg { 1693 m = -m 1694 } 1695 return m 1696 } 1697 1698 func umax32(x, y uint32) uint32 { 1699 if x > y { 1700 return x 1701 } 1702 return y 1703 }