github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/math/big/float.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 ) 18 19 const debugFloat = true // enable for debugging 20 21 // A nonzero finite Float represents a multi-precision floating point number 22 // 23 // sign × mantissa × 2**exponent 24 // 25 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 26 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 27 // All Floats are ordered, and the ordering of two Floats x and y 28 // is defined by x.Cmp(y). 29 // 30 // Each Float value also has a precision, rounding mode, and accuracy. 31 // The precision is the maximum number of mantissa bits available to 32 // represent the value. The rounding mode specifies how a result should 33 // be rounded to fit into the mantissa bits, and accuracy describes the 34 // rounding error with respect to the exact result. 35 // 36 // Unless specified otherwise, all operations (including setters) that 37 // specify a *Float variable for the result (usually via the receiver 38 // with the exception of MantExp), round the numeric result according 39 // to the precision and rounding mode of the result variable. 40 // 41 // If the provided result precision is 0 (see below), it is set to the 42 // precision of the argument with the largest precision value before any 43 // rounding takes place, and the rounding mode remains unchanged. Thus, 44 // uninitialized Floats provided as result arguments will have their 45 // precision set to a reasonable value determined by the operands and 46 // their mode is the zero value for RoundingMode (ToNearestEven). 47 // 48 // By setting the desired precision to 24 or 53 and using matching rounding 49 // mode (typically ToNearestEven), Float operations produce the same results 50 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 51 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 52 // Exponent underflow and overflow lead to a 0 or an Infinity for different 53 // values than IEEE-754 because Float exponents have a much larger range. 54 // 55 // The zero (uninitialized) value for a Float is ready to use and represents 56 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 57 // 58 type Float struct { 59 prec uint32 60 mode RoundingMode 61 acc Accuracy 62 form form 63 neg bool 64 mant nat 65 exp int32 66 } 67 68 // An ErrNaN panic is raised by a Float operation that would lead to 69 // a NaN under IEEE-754 rules. An ErrNaN implements the error interface. 70 type ErrNaN struct { 71 msg string 72 } 73 74 func (err ErrNaN) Error() string { 75 return err.msg 76 } 77 78 // NewFloat allocates and returns a new Float set to x, 79 // with precision 53 and rounding mode ToNearestEven. 80 // NewFloat panics with ErrNaN if x is a NaN. 81 func NewFloat(x float64) *Float { 82 if math.IsNaN(x) { 83 panic(ErrNaN{"NewFloat(NaN)"}) 84 } 85 return new(Float).SetFloat64(x) 86 } 87 88 // Exponent and precision limits. 89 const ( 90 MaxExp = math.MaxInt32 // largest supported exponent 91 MinExp = math.MinInt32 // smallest supported exponent 92 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 93 ) 94 95 // Internal representation: The mantissa bits x.mant of a nonzero finite 96 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 97 // the slice may (but doesn't have to) be shorter if the mantissa contains 98 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 99 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 100 // trailing 0 bits or x.prec is not a multiple of the the Word size _W, 101 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 102 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 103 // 104 // A zero or non-finite Float x ignores x.mant and x.exp. 105 // 106 // x form neg mant exp 107 // ---------------------------------------------------------- 108 // ±0 zero sign - - 109 // 0 < |x| < +Inf finite sign mantissa exponent 110 // ±Inf inf sign - - 111 112 // A form value describes the internal representation. 113 type form byte 114 115 // The form value order is relevant - do not change! 116 const ( 117 zero form = iota 118 finite 119 inf 120 ) 121 122 // RoundingMode determines how a Float value is rounded to the 123 // desired precision. Rounding may change the Float value; the 124 // rounding error is described by the Float's Accuracy. 125 type RoundingMode byte 126 127 // The following rounding modes are supported. 128 const ( 129 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 130 ToNearestAway // == IEEE 754-2008 roundTiesToAway 131 ToZero // == IEEE 754-2008 roundTowardZero 132 AwayFromZero // no IEEE 754-2008 equivalent 133 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 134 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 135 ) 136 137 //go:generate stringer -type=RoundingMode 138 139 // Accuracy describes the rounding error produced by the most recent 140 // operation that generated a Float value, relative to the exact value. 141 type Accuracy int8 142 143 // Constants describing the Accuracy of a Float. 144 const ( 145 Below Accuracy = -1 146 Exact Accuracy = 0 147 Above Accuracy = +1 148 ) 149 150 //go:generate stringer -type=Accuracy 151 152 // SetPrec sets z's precision to prec and returns the (possibly) rounded 153 // value of z. Rounding occurs according to z's rounding mode if the mantissa 154 // cannot be represented in prec bits without loss of precision. 155 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 156 // If prec > MaxPrec, it is set to MaxPrec. 157 func (z *Float) SetPrec(prec uint) *Float { 158 z.acc = Exact // optimistically assume no rounding is needed 159 160 // special case 161 if prec == 0 { 162 z.prec = 0 163 if z.form == finite { 164 // truncate z to 0 165 z.acc = makeAcc(z.neg) 166 z.form = zero 167 } 168 return z 169 } 170 171 // general case 172 if prec > MaxPrec { 173 prec = MaxPrec 174 } 175 old := z.prec 176 z.prec = uint32(prec) 177 if z.prec < old { 178 z.round(0) 179 } 180 return z 181 } 182 183 func makeAcc(above bool) Accuracy { 184 if above { 185 return Above 186 } 187 return Below 188 } 189 190 // SetMode sets z's rounding mode to mode and returns an exact z. 191 // z remains unchanged otherwise. 192 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 193 func (z *Float) SetMode(mode RoundingMode) *Float { 194 z.mode = mode 195 z.acc = Exact 196 return z 197 } 198 199 // Prec returns the mantissa precision of x in bits. 200 // The result may be 0 for |x| == 0 and |x| == Inf. 201 func (x *Float) Prec() uint { 202 return uint(x.prec) 203 } 204 205 // MinPrec returns the minimum precision required to represent x exactly 206 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 207 // The result is 0 for |x| == 0 and |x| == Inf. 208 func (x *Float) MinPrec() uint { 209 if x.form != finite { 210 return 0 211 } 212 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 213 } 214 215 // Mode returns the rounding mode of x. 216 func (x *Float) Mode() RoundingMode { 217 return x.mode 218 } 219 220 // Acc returns the accuracy of x produced by the most recent operation. 221 func (x *Float) Acc() Accuracy { 222 return x.acc 223 } 224 225 // Sign returns: 226 // 227 // -1 if x < 0 228 // 0 if x is ±0 229 // +1 if x > 0 230 // 231 func (x *Float) Sign() int { 232 if debugFloat { 233 x.validate() 234 } 235 if x.form == zero { 236 return 0 237 } 238 if x.neg { 239 return -1 240 } 241 return 1 242 } 243 244 // MantExp breaks x into its mantissa and exponent components 245 // and returns the exponent. If a non-nil mant argument is 246 // provided its value is set to the mantissa of x, with the 247 // same precision and rounding mode as x. The components 248 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 249 // Calling MantExp with a nil argument is an efficient way to 250 // get the exponent of the receiver. 251 // 252 // Special cases are: 253 // 254 // ( ±0).MantExp(mant) = 0, with mant set to ±0 255 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 256 // 257 // x and mant may be the same in which case x is set to its 258 // mantissa value. 259 func (x *Float) MantExp(mant *Float) (exp int) { 260 if debugFloat { 261 x.validate() 262 } 263 if x.form == finite { 264 exp = int(x.exp) 265 } 266 if mant != nil { 267 mant.Copy(x) 268 if mant.form == finite { 269 mant.exp = 0 270 } 271 } 272 return 273 } 274 275 func (z *Float) setExpAndRound(exp int64, sbit uint) { 276 if exp < MinExp { 277 // underflow 278 z.acc = makeAcc(z.neg) 279 z.form = zero 280 return 281 } 282 283 if exp > MaxExp { 284 // overflow 285 z.acc = makeAcc(!z.neg) 286 z.form = inf 287 return 288 } 289 290 z.form = finite 291 z.exp = int32(exp) 292 z.round(sbit) 293 } 294 295 // SetMantExp sets z to mant × 2**exp and and returns z. 296 // The result z has the same precision and rounding mode 297 // as mant. SetMantExp is an inverse of MantExp but does 298 // not require 0.5 <= |mant| < 1.0. Specifically: 299 // 300 // mant := new(Float) 301 // new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true 302 // 303 // Special cases are: 304 // 305 // z.SetMantExp( ±0, exp) = ±0 306 // z.SetMantExp(±Inf, exp) = ±Inf 307 // 308 // z and mant may be the same in which case z's exponent 309 // is set to exp. 310 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 311 if debugFloat { 312 z.validate() 313 mant.validate() 314 } 315 z.Copy(mant) 316 if z.form != finite { 317 return z 318 } 319 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 320 return z 321 } 322 323 // Signbit returns true if x is negative or negative zero. 324 func (x *Float) Signbit() bool { 325 return x.neg 326 } 327 328 // IsInf reports whether x is +Inf or -Inf. 329 func (x *Float) IsInf() bool { 330 return x.form == inf 331 } 332 333 // IsInt reports whether x is an integer. 334 // ±Inf values are not integers. 335 func (x *Float) IsInt() bool { 336 if debugFloat { 337 x.validate() 338 } 339 // special cases 340 if x.form != finite { 341 return x.form == zero 342 } 343 // x.form == finite 344 if x.exp <= 0 { 345 return false 346 } 347 // x.exp > 0 348 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 349 } 350 351 // debugging support 352 func (x *Float) validate() { 353 if !debugFloat { 354 // avoid performance bugs 355 panic("validate called but debugFloat is not set") 356 } 357 if x.form != finite { 358 return 359 } 360 m := len(x.mant) 361 if m == 0 { 362 panic("nonzero finite number with empty mantissa") 363 } 364 const msb = 1 << (_W - 1) 365 if x.mant[m-1]&msb == 0 { 366 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0))) 367 } 368 if x.prec == 0 { 369 panic("zero precision finite number") 370 } 371 } 372 373 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 374 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 375 // have before calling round. z's mantissa must be normalized (with the msb set) 376 // or empty. 377 // 378 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 379 // sign of z. For correct rounding, the sign of z must be set correctly before 380 // calling round. 381 func (z *Float) round(sbit uint) { 382 if debugFloat { 383 z.validate() 384 if z.form > finite { 385 panic(fmt.Sprintf("round called for non-finite value %s", z)) 386 } 387 } 388 // z.form <= finite 389 390 z.acc = Exact 391 if z.form == zero { 392 return 393 } 394 // z.form == finite && len(z.mant) > 0 395 // m > 0 implies z.prec > 0 (checked by validate) 396 397 m := uint32(len(z.mant)) // present mantissa length in words 398 bits := m * _W // present mantissa bits 399 if bits <= z.prec { 400 // mantissa fits => nothing to do 401 return 402 } 403 // bits > z.prec 404 405 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 406 407 // Rounding is based on two bits: the rounding bit (rbit) and the 408 // sticky bit (sbit). The rbit is the bit immediately before the 409 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 410 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 411 // 412 // rbit sbit => "fractional part" 413 // 414 // 0 0 == 0 415 // 0 1 > 0 , < 0.5 416 // 1 0 == 0.5 417 // 1 1 > 0.5, < 1.0 418 419 // bits > z.prec: mantissa too large => round 420 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 421 rbit := z.mant.bit(r) // rounding bit 422 if sbit == 0 { 423 sbit = z.mant.sticky(r) 424 } 425 if debugFloat && sbit&^1 != 0 { 426 panic(fmt.Sprintf("invalid sbit %#x", sbit)) 427 } 428 429 // convert ToXInf rounding modes 430 mode := z.mode 431 switch mode { 432 case ToNegativeInf: 433 mode = ToZero 434 if z.neg { 435 mode = AwayFromZero 436 } 437 case ToPositiveInf: 438 mode = AwayFromZero 439 if z.neg { 440 mode = ToZero 441 } 442 } 443 444 // cut off extra words 445 if m > n { 446 copy(z.mant, z.mant[m-n:]) // move n last words to front 447 z.mant = z.mant[:n] 448 } 449 450 // determine number of trailing zero bits t 451 t := n*_W - z.prec // 0 <= t < _W 452 lsb := Word(1) << t 453 454 // make rounding decision 455 // TODO(gri) This can be simplified (see Bits.round in bits_test.go). 456 switch mode { 457 case ToZero: 458 // nothing to do 459 case ToNearestEven, ToNearestAway: 460 if rbit == 0 { 461 // rounding bits == 0b0x 462 mode = ToZero 463 } else if sbit == 1 { 464 // rounding bits == 0b11 465 mode = AwayFromZero 466 } 467 case AwayFromZero: 468 if rbit|sbit == 0 { 469 mode = ToZero 470 } 471 default: 472 // ToXInf modes have been converted to ToZero or AwayFromZero 473 panic("unreachable") 474 } 475 476 // round and determine accuracy 477 switch mode { 478 case ToZero: 479 if rbit|sbit != 0 { 480 z.acc = Below 481 } 482 483 case ToNearestEven, ToNearestAway: 484 if debugFloat && rbit != 1 { 485 panic("internal error in rounding") 486 } 487 if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 { 488 z.acc = Below 489 break 490 } 491 // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0 492 fallthrough 493 494 case AwayFromZero: 495 // add 1 to mantissa 496 if addVW(z.mant, z.mant, lsb) != 0 { 497 // overflow => shift mantissa right by 1 and add msb 498 shrVU(z.mant, z.mant, 1) 499 z.mant[n-1] |= 1 << (_W - 1) 500 // adjust exponent 501 if z.exp < MaxExp { 502 z.exp++ 503 } else { 504 // exponent overflow 505 z.acc = makeAcc(!z.neg) 506 z.form = inf 507 return 508 } 509 } 510 z.acc = Above 511 } 512 513 // zero out trailing bits in least-significant word 514 z.mant[0] &^= lsb - 1 515 516 // update accuracy 517 if z.acc != Exact && z.neg { 518 z.acc = -z.acc 519 } 520 521 if debugFloat { 522 z.validate() 523 } 524 525 return 526 } 527 528 // nlz returns the number of leading zero bits in x. 529 func nlz(x Word) uint { 530 return _W - uint(bitLen(x)) 531 } 532 533 func nlz64(x uint64) uint { 534 // TODO(gri) this can be done more nicely 535 if _W == 32 { 536 if x>>32 == 0 { 537 return 32 + nlz(Word(x)) 538 } 539 return nlz(Word(x >> 32)) 540 } 541 if _W == 64 { 542 return nlz(Word(x)) 543 } 544 panic("unreachable") 545 } 546 547 func (z *Float) setBits64(neg bool, x uint64) *Float { 548 if z.prec == 0 { 549 z.prec = 64 550 } 551 z.acc = Exact 552 z.neg = neg 553 if x == 0 { 554 z.form = zero 555 return z 556 } 557 // x != 0 558 z.form = finite 559 s := nlz64(x) 560 z.mant = z.mant.setUint64(x << s) 561 z.exp = int32(64 - s) // always fits 562 if z.prec < 64 { 563 z.round(0) 564 } 565 return z 566 } 567 568 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 569 // If z's precision is 0, it is changed to 64 (and rounding will have 570 // no effect). 571 func (z *Float) SetUint64(x uint64) *Float { 572 return z.setBits64(false, x) 573 } 574 575 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 576 // If z's precision is 0, it is changed to 64 (and rounding will have 577 // no effect). 578 func (z *Float) SetInt64(x int64) *Float { 579 u := x 580 if u < 0 { 581 u = -u 582 } 583 // We cannot simply call z.SetUint64(uint64(u)) and change 584 // the sign afterwards because the sign affects rounding. 585 return z.setBits64(x < 0, uint64(u)) 586 } 587 588 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 589 // If z's precision is 0, it is changed to 53 (and rounding will have 590 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 591 func (z *Float) SetFloat64(x float64) *Float { 592 if z.prec == 0 { 593 z.prec = 53 594 } 595 if math.IsNaN(x) { 596 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 597 } 598 z.acc = Exact 599 z.neg = math.Signbit(x) // handle -0, -Inf correctly 600 if x == 0 { 601 z.form = zero 602 return z 603 } 604 if math.IsInf(x, 0) { 605 z.form = inf 606 return z 607 } 608 // normalized x != 0 609 z.form = finite 610 fmant, exp := math.Frexp(x) // get normalized mantissa 611 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 612 z.exp = int32(exp) // always fits 613 if z.prec < 53 { 614 z.round(0) 615 } 616 return z 617 } 618 619 // fnorm normalizes mantissa m by shifting it to the left 620 // such that the msb of the most-significant word (msw) is 1. 621 // It returns the shift amount. It assumes that len(m) != 0. 622 func fnorm(m nat) int64 { 623 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 624 panic("msw of mantissa is 0") 625 } 626 s := nlz(m[len(m)-1]) 627 if s > 0 { 628 c := shlVU(m, m, s) 629 if debugFloat && c != 0 { 630 panic("nlz or shlVU incorrect") 631 } 632 } 633 return int64(s) 634 } 635 636 // SetInt sets z to the (possibly rounded) value of x and returns z. 637 // If z's precision is 0, it is changed to the larger of x.BitLen() 638 // or 64 (and rounding will have no effect). 639 func (z *Float) SetInt(x *Int) *Float { 640 // TODO(gri) can be more efficient if z.prec > 0 641 // but small compared to the size of x, or if there 642 // are many trailing 0's. 643 bits := uint32(x.BitLen()) 644 if z.prec == 0 { 645 z.prec = umax32(bits, 64) 646 } 647 z.acc = Exact 648 z.neg = x.neg 649 if len(x.abs) == 0 { 650 z.form = zero 651 return z 652 } 653 // x != 0 654 z.mant = z.mant.set(x.abs) 655 fnorm(z.mant) 656 z.setExpAndRound(int64(bits), 0) 657 return z 658 } 659 660 // SetRat sets z to the (possibly rounded) value of x and returns z. 661 // If z's precision is 0, it is changed to the largest of a.BitLen(), 662 // b.BitLen(), or 64; with x = a/b. 663 func (z *Float) SetRat(x *Rat) *Float { 664 if x.IsInt() { 665 return z.SetInt(x.Num()) 666 } 667 var a, b Float 668 a.SetInt(x.Num()) 669 b.SetInt(x.Denom()) 670 if z.prec == 0 { 671 z.prec = umax32(a.prec, b.prec) 672 } 673 return z.Quo(&a, &b) 674 } 675 676 // SetInf sets z to the infinite Float -Inf if signbit is 677 // set, or +Inf if signbit is not set, and returns z. The 678 // precision of z is unchanged and the result is always 679 // Exact. 680 func (z *Float) SetInf(signbit bool) *Float { 681 z.acc = Exact 682 z.form = inf 683 z.neg = signbit 684 return z 685 } 686 687 // Set sets z to the (possibly rounded) value of x and returns z. 688 // If z's precision is 0, it is changed to the precision of x 689 // before setting z (and rounding will have no effect). 690 // Rounding is performed according to z's precision and rounding 691 // mode; and z's accuracy reports the result error relative to the 692 // exact (not rounded) result. 693 func (z *Float) Set(x *Float) *Float { 694 if debugFloat { 695 x.validate() 696 } 697 z.acc = Exact 698 if z != x { 699 z.form = x.form 700 z.neg = x.neg 701 if x.form == finite { 702 z.exp = x.exp 703 z.mant = z.mant.set(x.mant) 704 } 705 if z.prec == 0 { 706 z.prec = x.prec 707 } else if z.prec < x.prec { 708 z.round(0) 709 } 710 } 711 return z 712 } 713 714 // Copy sets z to x, with the same precision, rounding mode, and 715 // accuracy as x, and returns z. x is not changed even if z and 716 // x are the same. 717 func (z *Float) Copy(x *Float) *Float { 718 if debugFloat { 719 x.validate() 720 } 721 if z != x { 722 z.prec = x.prec 723 z.mode = x.mode 724 z.acc = x.acc 725 z.form = x.form 726 z.neg = x.neg 727 if z.form == finite { 728 z.mant = z.mant.set(x.mant) 729 z.exp = x.exp 730 } 731 } 732 return z 733 } 734 735 func high32(x nat) uint32 { 736 // TODO(gri) This can be done more efficiently on 32bit platforms. 737 return uint32(high64(x) >> 32) 738 } 739 740 func high64(x nat) uint64 { 741 i := len(x) 742 if i == 0 { 743 return 0 744 } 745 // i > 0 746 v := uint64(x[i-1]) 747 if _W == 32 { 748 v <<= 32 749 if i > 1 { 750 v |= uint64(x[i-2]) 751 } 752 } 753 return v 754 } 755 756 // Uint64 returns the unsigned integer resulting from truncating x 757 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 758 // if x is an integer and Below otherwise. 759 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 760 // for x > math.MaxUint64. 761 func (x *Float) Uint64() (uint64, Accuracy) { 762 if debugFloat { 763 x.validate() 764 } 765 766 switch x.form { 767 case finite: 768 if x.neg { 769 return 0, Above 770 } 771 // 0 < x < +Inf 772 if x.exp <= 0 { 773 // 0 < x < 1 774 return 0, Below 775 } 776 // 1 <= x < Inf 777 if x.exp <= 64 { 778 // u = trunc(x) fits into a uint64 779 u := high64(x.mant) >> (64 - uint32(x.exp)) 780 if x.MinPrec() <= 64 { 781 return u, Exact 782 } 783 return u, Below // x truncated 784 } 785 // x too large 786 return math.MaxUint64, Below 787 788 case zero: 789 return 0, Exact 790 791 case inf: 792 if x.neg { 793 return 0, Above 794 } 795 return math.MaxUint64, Below 796 } 797 798 panic("unreachable") 799 } 800 801 // Int64 returns the integer resulting from truncating x towards zero. 802 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 803 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 804 // The result is (math.MinInt64, Above) for x < math.MinInt64, 805 // and (math.MaxInt64, Below) for x > math.MaxInt64. 806 func (x *Float) Int64() (int64, Accuracy) { 807 if debugFloat { 808 x.validate() 809 } 810 811 switch x.form { 812 case finite: 813 // 0 < |x| < +Inf 814 acc := makeAcc(x.neg) 815 if x.exp <= 0 { 816 // 0 < |x| < 1 817 return 0, acc 818 } 819 // x.exp > 0 820 821 // 1 <= |x| < +Inf 822 if x.exp <= 63 { 823 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 824 i := int64(high64(x.mant) >> (64 - uint32(x.exp))) 825 if x.neg { 826 i = -i 827 } 828 if x.MinPrec() <= uint(x.exp) { 829 return i, Exact 830 } 831 return i, acc // x truncated 832 } 833 if x.neg { 834 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 835 if x.exp == 64 && x.MinPrec() == 1 { 836 acc = Exact 837 } 838 return math.MinInt64, acc 839 } 840 // x too large 841 return math.MaxInt64, Below 842 843 case zero: 844 return 0, Exact 845 846 case inf: 847 if x.neg { 848 return math.MinInt64, Above 849 } 850 return math.MaxInt64, Below 851 } 852 853 panic("unreachable") 854 } 855 856 // TODO(gri) Float32 and Float64 are very similar internally but for the 857 // floatxx parameters and some conversions. Should factor out shared code. 858 859 // Float32 returns the float32 value nearest to x. If x is too small to be 860 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 861 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 862 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 863 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 864 func (x *Float) Float32() (float32, Accuracy) { 865 if debugFloat { 866 x.validate() 867 } 868 869 switch x.form { 870 case finite: 871 // 0 < |x| < +Inf 872 873 const ( 874 fbits = 32 // float size 875 mbits = 23 // mantissa size (excluding implicit msb) 876 ebits = fbits - mbits - 1 // 8 exponent size 877 bias = 1<<(ebits-1) - 1 // 127 exponent bias 878 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 879 emin = 1 - bias // -126 smallest unbiased exponent (normal) 880 emax = bias // 127 largest unbiased exponent (normal) 881 ) 882 883 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 884 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 885 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 886 // corresponding Float exponent. 887 // (see also implementation of math.Ldexp for similar code) 888 889 if x.exp < dmin+1 { 890 // underflow 891 if x.neg { 892 var z float32 893 return -z, Above 894 } 895 return 0.0, Below 896 } 897 // x.exp >= dmin+1 898 899 var r Float 900 r.prec = mbits + 1 // +1 for implicit msb 901 if x.exp < emin+1 { 902 // denormal number - round to fewer bits 903 r.prec = uint32(x.exp - dmin) 904 } 905 r.Set(x) 906 907 // Rounding may have caused r to overflow to ±Inf 908 // (rounding never causes underflows to 0). 909 if r.form == inf { 910 r.exp = emax + 2 // cause overflow below 911 } 912 913 if r.exp > emax+1 { 914 // overflow 915 if x.neg { 916 return float32(math.Inf(-1)), Below 917 } 918 return float32(math.Inf(+1)), Above 919 } 920 // dmin+1 <= r.exp <= emax+1 921 922 var s uint32 923 if r.neg { 924 s = 1 << (fbits - 1) 925 } 926 927 m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 928 929 // Rounding may have caused a denormal number to 930 // become normal. Check again. 931 c := float32(1.0) 932 if r.exp < emin+1 { 933 // denormal number 934 r.exp += mbits 935 c = 1.0 / (1 << mbits) // 2**-mbits 936 } 937 // emin+1 <= r.exp <= emax+1 938 e := uint32(r.exp-emin) << mbits 939 940 return c * math.Float32frombits(s|e|m), r.acc 941 942 case zero: 943 if x.neg { 944 var z float32 945 return -z, Exact 946 } 947 return 0.0, Exact 948 949 case inf: 950 if x.neg { 951 return float32(math.Inf(-1)), Exact 952 } 953 return float32(math.Inf(+1)), Exact 954 } 955 956 panic("unreachable") 957 } 958 959 // Float64 returns the float64 value nearest to x. If x is too small to be 960 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 961 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 962 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 963 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 964 func (x *Float) Float64() (float64, Accuracy) { 965 if debugFloat { 966 x.validate() 967 } 968 969 switch x.form { 970 case finite: 971 // 0 < |x| < +Inf 972 973 const ( 974 fbits = 64 // float size 975 mbits = 52 // mantissa size (excluding implicit msb) 976 ebits = fbits - mbits - 1 // 11 exponent size 977 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 978 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 979 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 980 emax = bias // 1023 largest unbiased exponent (normal) 981 ) 982 983 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 984 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 985 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 986 // corresponding Float exponent. 987 // (see also implementation of math.Ldexp for similar code) 988 989 if x.exp < dmin+1 { 990 // underflow 991 if x.neg { 992 var z float64 993 return -z, Above 994 } 995 return 0.0, Below 996 } 997 // x.exp >= dmin+1 998 999 var r Float 1000 r.prec = mbits + 1 // +1 for implicit msb 1001 if x.exp < emin+1 { 1002 // denormal number - round to fewer bits 1003 r.prec = uint32(x.exp - dmin) 1004 } 1005 r.Set(x) 1006 1007 // Rounding may have caused r to overflow to ±Inf 1008 // (rounding never causes underflows to 0). 1009 if r.form == inf { 1010 r.exp = emax + 2 // cause overflow below 1011 } 1012 1013 if r.exp > emax+1 { 1014 // overflow 1015 if x.neg { 1016 return math.Inf(-1), Below 1017 } 1018 return math.Inf(+1), Above 1019 } 1020 // dmin+1 <= r.exp <= emax+1 1021 1022 var s uint64 1023 if r.neg { 1024 s = 1 << (fbits - 1) 1025 } 1026 1027 m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1028 1029 // Rounding may have caused a denormal number to 1030 // become normal. Check again. 1031 c := 1.0 1032 if r.exp < emin+1 { 1033 // denormal number 1034 r.exp += mbits 1035 c = 1.0 / (1 << mbits) // 2**-mbits 1036 } 1037 // emin+1 <= r.exp <= emax+1 1038 e := uint64(r.exp-emin) << mbits 1039 1040 return c * math.Float64frombits(s|e|m), r.acc 1041 1042 case zero: 1043 if x.neg { 1044 var z float64 1045 return -z, Exact 1046 } 1047 return 0.0, Exact 1048 1049 case inf: 1050 if x.neg { 1051 return math.Inf(-1), Exact 1052 } 1053 return math.Inf(+1), Exact 1054 } 1055 1056 panic("unreachable") 1057 } 1058 1059 // Int returns the result of truncating x towards zero; 1060 // or nil if x is an infinity. 1061 // The result is Exact if x.IsInt(); otherwise it is Below 1062 // for x > 0, and Above for x < 0. 1063 // If a non-nil *Int argument z is provided, Int stores 1064 // the result in z instead of allocating a new Int. 1065 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1066 if debugFloat { 1067 x.validate() 1068 } 1069 1070 if z == nil && x.form <= finite { 1071 z = new(Int) 1072 } 1073 1074 switch x.form { 1075 case finite: 1076 // 0 < |x| < +Inf 1077 acc := makeAcc(x.neg) 1078 if x.exp <= 0 { 1079 // 0 < |x| < 1 1080 return z.SetInt64(0), acc 1081 } 1082 // x.exp > 0 1083 1084 // 1 <= |x| < +Inf 1085 // determine minimum required precision for x 1086 allBits := uint(len(x.mant)) * _W 1087 exp := uint(x.exp) 1088 if x.MinPrec() <= exp { 1089 acc = Exact 1090 } 1091 // shift mantissa as needed 1092 if z == nil { 1093 z = new(Int) 1094 } 1095 z.neg = x.neg 1096 switch { 1097 case exp > allBits: 1098 z.abs = z.abs.shl(x.mant, exp-allBits) 1099 default: 1100 z.abs = z.abs.set(x.mant) 1101 case exp < allBits: 1102 z.abs = z.abs.shr(x.mant, allBits-exp) 1103 } 1104 return z, acc 1105 1106 case zero: 1107 return z.SetInt64(0), Exact 1108 1109 case inf: 1110 return nil, makeAcc(x.neg) 1111 } 1112 1113 panic("unreachable") 1114 } 1115 1116 // Rat returns the rational number corresponding to x; 1117 // or nil if x is an infinity. 1118 // The result is Exact is x is not an Inf. 1119 // If a non-nil *Rat argument z is provided, Rat stores 1120 // the result in z instead of allocating a new Rat. 1121 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1122 if debugFloat { 1123 x.validate() 1124 } 1125 1126 if z == nil && x.form <= finite { 1127 z = new(Rat) 1128 } 1129 1130 switch x.form { 1131 case finite: 1132 // 0 < |x| < +Inf 1133 allBits := int32(len(x.mant)) * _W 1134 // build up numerator and denominator 1135 z.a.neg = x.neg 1136 switch { 1137 case x.exp > allBits: 1138 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1139 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1140 // z already in normal form 1141 default: 1142 z.a.abs = z.a.abs.set(x.mant) 1143 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1144 // z already in normal form 1145 case x.exp < allBits: 1146 z.a.abs = z.a.abs.set(x.mant) 1147 t := z.b.abs.setUint64(1) 1148 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1149 z.norm() 1150 } 1151 return z, Exact 1152 1153 case zero: 1154 return z.SetInt64(0), Exact 1155 1156 case inf: 1157 return nil, makeAcc(x.neg) 1158 } 1159 1160 panic("unreachable") 1161 } 1162 1163 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1164 // and returns z. 1165 func (z *Float) Abs(x *Float) *Float { 1166 z.Set(x) 1167 z.neg = false 1168 return z 1169 } 1170 1171 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1172 // and returns z. 1173 func (z *Float) Neg(x *Float) *Float { 1174 z.Set(x) 1175 z.neg = !z.neg 1176 return z 1177 } 1178 1179 func validateBinaryOperands(x, y *Float) { 1180 if !debugFloat { 1181 // avoid performance bugs 1182 panic("validateBinaryOperands called but debugFloat is not set") 1183 } 1184 if len(x.mant) == 0 { 1185 panic("empty mantissa for x") 1186 } 1187 if len(y.mant) == 0 { 1188 panic("empty mantissa for y") 1189 } 1190 } 1191 1192 // z = x + y, ignoring signs of x and y for the addition 1193 // but using the sign of z for rounding the result. 1194 // x and y must have a non-empty mantissa and valid exponent. 1195 func (z *Float) uadd(x, y *Float) { 1196 // Note: This implementation requires 2 shifts most of the 1197 // time. It is also inefficient if exponents or precisions 1198 // differ by wide margins. The following article describes 1199 // an efficient (but much more complicated) implementation 1200 // compatible with the internal representation used here: 1201 // 1202 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1203 // Point Addition With Exact Rounding (as in the MPFR Library)" 1204 // http://www.vinc17.net/research/papers/rnc6.pdf 1205 1206 if debugFloat { 1207 validateBinaryOperands(x, y) 1208 } 1209 1210 // compute exponents ex, ey for mantissa with "binary point" 1211 // on the right (mantissa.0) - use int64 to avoid overflow 1212 ex := int64(x.exp) - int64(len(x.mant))*_W 1213 ey := int64(y.exp) - int64(len(y.mant))*_W 1214 1215 // TODO(gri) having a combined add-and-shift primitive 1216 // could make this code significantly faster 1217 switch { 1218 case ex < ey: 1219 // cannot re-use z.mant w/o testing for aliasing 1220 t := nat(nil).shl(y.mant, uint(ey-ex)) 1221 z.mant = z.mant.add(x.mant, t) 1222 default: 1223 // ex == ey, no shift needed 1224 z.mant = z.mant.add(x.mant, y.mant) 1225 case ex > ey: 1226 // cannot re-use z.mant w/o testing for aliasing 1227 t := nat(nil).shl(x.mant, uint(ex-ey)) 1228 z.mant = z.mant.add(t, y.mant) 1229 ex = ey 1230 } 1231 // len(z.mant) > 0 1232 1233 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1234 } 1235 1236 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1237 // but using the sign of z for rounding the result. 1238 // x and y must have a non-empty mantissa and valid exponent. 1239 func (z *Float) usub(x, y *Float) { 1240 // This code is symmetric to uadd. 1241 // We have not factored the common code out because 1242 // eventually uadd (and usub) should be optimized 1243 // by special-casing, and the code will diverge. 1244 1245 if debugFloat { 1246 validateBinaryOperands(x, y) 1247 } 1248 1249 ex := int64(x.exp) - int64(len(x.mant))*_W 1250 ey := int64(y.exp) - int64(len(y.mant))*_W 1251 1252 switch { 1253 case ex < ey: 1254 // cannot re-use z.mant w/o testing for aliasing 1255 t := nat(nil).shl(y.mant, uint(ey-ex)) 1256 z.mant = t.sub(x.mant, t) 1257 default: 1258 // ex == ey, no shift needed 1259 z.mant = z.mant.sub(x.mant, y.mant) 1260 case ex > ey: 1261 // cannot re-use z.mant w/o testing for aliasing 1262 t := nat(nil).shl(x.mant, uint(ex-ey)) 1263 z.mant = t.sub(t, y.mant) 1264 ex = ey 1265 } 1266 1267 // operands may have cancelled each other out 1268 if len(z.mant) == 0 { 1269 z.acc = Exact 1270 z.form = zero 1271 z.neg = false 1272 return 1273 } 1274 // len(z.mant) > 0 1275 1276 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1277 } 1278 1279 // z = x * y, ignoring signs of x and y for the multiplication 1280 // but using the sign of z for rounding the result. 1281 // x and y must have a non-empty mantissa and valid exponent. 1282 func (z *Float) umul(x, y *Float) { 1283 if debugFloat { 1284 validateBinaryOperands(x, y) 1285 } 1286 1287 // Note: This is doing too much work if the precision 1288 // of z is less than the sum of the precisions of x 1289 // and y which is often the case (e.g., if all floats 1290 // have the same precision). 1291 // TODO(gri) Optimize this for the common case. 1292 1293 e := int64(x.exp) + int64(y.exp) 1294 z.mant = z.mant.mul(x.mant, y.mant) 1295 1296 z.setExpAndRound(e-fnorm(z.mant), 0) 1297 } 1298 1299 // z = x / y, ignoring signs of x and y for the division 1300 // but using the sign of z for rounding the result. 1301 // x and y must have a non-empty mantissa and valid exponent. 1302 func (z *Float) uquo(x, y *Float) { 1303 if debugFloat { 1304 validateBinaryOperands(x, y) 1305 } 1306 1307 // mantissa length in words for desired result precision + 1 1308 // (at least one extra bit so we get the rounding bit after 1309 // the division) 1310 n := int(z.prec/_W) + 1 1311 1312 // compute adjusted x.mant such that we get enough result precision 1313 xadj := x.mant 1314 if d := n - len(x.mant) + len(y.mant); d > 0 { 1315 // d extra words needed => add d "0 digits" to x 1316 xadj = make(nat, len(x.mant)+d) 1317 copy(xadj[d:], x.mant) 1318 } 1319 // TODO(gri): If we have too many digits (d < 0), we should be able 1320 // to shorten x for faster division. But we must be extra careful 1321 // with rounding in that case. 1322 1323 // Compute d before division since there may be aliasing of x.mant 1324 // (via xadj) or y.mant with z.mant. 1325 d := len(xadj) - len(y.mant) 1326 1327 // divide 1328 var r nat 1329 z.mant, r = z.mant.div(nil, xadj, y.mant) 1330 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1331 1332 // The result is long enough to include (at least) the rounding bit. 1333 // If there's a non-zero remainder, the corresponding fractional part 1334 // (if it were computed), would have a non-zero sticky bit (if it were 1335 // zero, it couldn't have a non-zero remainder). 1336 var sbit uint 1337 if len(r) > 0 { 1338 sbit = 1 1339 } 1340 1341 z.setExpAndRound(e-fnorm(z.mant), sbit) 1342 } 1343 1344 // ucmp returns -1, 0, or +1, depending on whether 1345 // |x| < |y|, |x| == |y|, or |x| > |y|. 1346 // x and y must have a non-empty mantissa and valid exponent. 1347 func (x *Float) ucmp(y *Float) int { 1348 if debugFloat { 1349 validateBinaryOperands(x, y) 1350 } 1351 1352 switch { 1353 case x.exp < y.exp: 1354 return -1 1355 case x.exp > y.exp: 1356 return +1 1357 } 1358 // x.exp == y.exp 1359 1360 // compare mantissas 1361 i := len(x.mant) 1362 j := len(y.mant) 1363 for i > 0 || j > 0 { 1364 var xm, ym Word 1365 if i > 0 { 1366 i-- 1367 xm = x.mant[i] 1368 } 1369 if j > 0 { 1370 j-- 1371 ym = y.mant[j] 1372 } 1373 switch { 1374 case xm < ym: 1375 return -1 1376 case xm > ym: 1377 return +1 1378 } 1379 } 1380 1381 return 0 1382 } 1383 1384 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1385 // 1386 // When neither the inputs nor result are NaN, the sign of a product or 1387 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1388 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1389 // one of the addends’ signs; and the sign of the result of conversions, 1390 // the quantize operation, the roundToIntegral operations, and the 1391 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1392 // These rules shall apply even when operands or results are zero or infinite. 1393 // 1394 // When the sum of two operands with opposite signs (or the difference of 1395 // two operands with like signs) is exactly zero, the sign of that sum (or 1396 // difference) shall be +0 in all rounding-direction attributes except 1397 // roundTowardNegative; under that attribute, the sign of an exact zero 1398 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1399 // sign as x even when x is zero. 1400 // 1401 // See also: http://play.golang.org/p/RtH3UCt5IH 1402 1403 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1404 // it is changed to the larger of x's or y's precision before the operation. 1405 // Rounding is performed according to z's precision and rounding mode; and 1406 // z's accuracy reports the result error relative to the exact (not rounded) 1407 // result. Add panics with ErrNaN if x and y are infinities with opposite 1408 // signs. The value of z is undefined in that case. 1409 // 1410 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1411 func (z *Float) Add(x, y *Float) *Float { 1412 if debugFloat { 1413 x.validate() 1414 y.validate() 1415 } 1416 1417 if z.prec == 0 { 1418 z.prec = umax32(x.prec, y.prec) 1419 } 1420 1421 if x.form == finite && y.form == finite { 1422 // x + y (commom case) 1423 z.neg = x.neg 1424 if x.neg == y.neg { 1425 // x + y == x + y 1426 // (-x) + (-y) == -(x + y) 1427 z.uadd(x, y) 1428 } else { 1429 // x + (-y) == x - y == -(y - x) 1430 // (-x) + y == y - x == -(x - y) 1431 if x.ucmp(y) > 0 { 1432 z.usub(x, y) 1433 } else { 1434 z.neg = !z.neg 1435 z.usub(y, x) 1436 } 1437 } 1438 return z 1439 } 1440 1441 if x.form == inf && y.form == inf && x.neg != y.neg { 1442 // +Inf + -Inf 1443 // -Inf + +Inf 1444 // value of z is undefined but make sure it's valid 1445 z.acc = Exact 1446 z.form = zero 1447 z.neg = false 1448 panic(ErrNaN{"addition of infinities with opposite signs"}) 1449 } 1450 1451 if x.form == zero && y.form == zero { 1452 // ±0 + ±0 1453 z.acc = Exact 1454 z.form = zero 1455 z.neg = x.neg && y.neg // -0 + -0 == -0 1456 return z 1457 } 1458 1459 if x.form == inf || y.form == zero { 1460 // ±Inf + y 1461 // x + ±0 1462 return z.Set(x) 1463 } 1464 1465 // ±0 + y 1466 // x + ±Inf 1467 return z.Set(y) 1468 } 1469 1470 // Sub sets z to the rounded difference x-y and returns z. 1471 // Precision, rounding, and accuracy reporting are as for Add. 1472 // Sub panics with ErrNaN if x and y are infinities with equal 1473 // signs. The value of z is undefined in that case. 1474 func (z *Float) Sub(x, y *Float) *Float { 1475 if debugFloat { 1476 x.validate() 1477 y.validate() 1478 } 1479 1480 if z.prec == 0 { 1481 z.prec = umax32(x.prec, y.prec) 1482 } 1483 1484 if x.form == finite && y.form == finite { 1485 // x - y (common case) 1486 z.neg = x.neg 1487 if x.neg != y.neg { 1488 // x - (-y) == x + y 1489 // (-x) - y == -(x + y) 1490 z.uadd(x, y) 1491 } else { 1492 // x - y == x - y == -(y - x) 1493 // (-x) - (-y) == y - x == -(x - y) 1494 if x.ucmp(y) > 0 { 1495 z.usub(x, y) 1496 } else { 1497 z.neg = !z.neg 1498 z.usub(y, x) 1499 } 1500 } 1501 return z 1502 } 1503 1504 if x.form == inf && y.form == inf && x.neg == y.neg { 1505 // +Inf - +Inf 1506 // -Inf - -Inf 1507 // value of z is undefined but make sure it's valid 1508 z.acc = Exact 1509 z.form = zero 1510 z.neg = false 1511 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1512 } 1513 1514 if x.form == zero && y.form == zero { 1515 // ±0 - ±0 1516 z.acc = Exact 1517 z.form = zero 1518 z.neg = x.neg && !y.neg // -0 - +0 == -0 1519 return z 1520 } 1521 1522 if x.form == inf || y.form == zero { 1523 // ±Inf - y 1524 // x - ±0 1525 return z.Set(x) 1526 } 1527 1528 // ±0 - y 1529 // x - ±Inf 1530 return z.Neg(y) 1531 } 1532 1533 // Mul sets z to the rounded product x*y and returns z. 1534 // Precision, rounding, and accuracy reporting are as for Add. 1535 // Mul panics with ErrNaN if one operand is zero and the other 1536 // operand an infinity. The value of z is undefined in that case. 1537 func (z *Float) Mul(x, y *Float) *Float { 1538 if debugFloat { 1539 x.validate() 1540 y.validate() 1541 } 1542 1543 if z.prec == 0 { 1544 z.prec = umax32(x.prec, y.prec) 1545 } 1546 1547 z.neg = x.neg != y.neg 1548 1549 if x.form == finite && y.form == finite { 1550 // x * y (common case) 1551 z.umul(x, y) 1552 return z 1553 } 1554 1555 z.acc = Exact 1556 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1557 // ±0 * ±Inf 1558 // ±Inf * ±0 1559 // value of z is undefined but make sure it's valid 1560 z.form = zero 1561 z.neg = false 1562 panic(ErrNaN{"multiplication of zero with infinity"}) 1563 } 1564 1565 if x.form == inf || y.form == inf { 1566 // ±Inf * y 1567 // x * ±Inf 1568 z.form = inf 1569 return z 1570 } 1571 1572 // ±0 * y 1573 // x * ±0 1574 z.form = zero 1575 return z 1576 } 1577 1578 // Quo sets z to the rounded quotient x/y and returns z. 1579 // Precision, rounding, and accuracy reporting are as for Add. 1580 // Quo panics with ErrNaN if both operands are zero or infinities. 1581 // The value of z is undefined in that case. 1582 func (z *Float) Quo(x, y *Float) *Float { 1583 if debugFloat { 1584 x.validate() 1585 y.validate() 1586 } 1587 1588 if z.prec == 0 { 1589 z.prec = umax32(x.prec, y.prec) 1590 } 1591 1592 z.neg = x.neg != y.neg 1593 1594 if x.form == finite && y.form == finite { 1595 // x / y (common case) 1596 z.uquo(x, y) 1597 return z 1598 } 1599 1600 z.acc = Exact 1601 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1602 // ±0 / ±0 1603 // ±Inf / ±Inf 1604 // value of z is undefined but make sure it's valid 1605 z.form = zero 1606 z.neg = false 1607 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1608 } 1609 1610 if x.form == zero || y.form == inf { 1611 // ±0 / y 1612 // x / ±Inf 1613 z.form = zero 1614 return z 1615 } 1616 1617 // x / ±0 1618 // ±Inf / y 1619 z.form = inf 1620 return z 1621 } 1622 1623 // Cmp compares x and y and returns: 1624 // 1625 // -1 if x < y 1626 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1627 // +1 if x > y 1628 // 1629 func (x *Float) Cmp(y *Float) int { 1630 if debugFloat { 1631 x.validate() 1632 y.validate() 1633 } 1634 1635 mx := x.ord() 1636 my := y.ord() 1637 switch { 1638 case mx < my: 1639 return -1 1640 case mx > my: 1641 return +1 1642 } 1643 // mx == my 1644 1645 // only if |mx| == 1 we have to compare the mantissae 1646 switch mx { 1647 case -1: 1648 return y.ucmp(x) 1649 case +1: 1650 return x.ucmp(y) 1651 } 1652 1653 return 0 1654 } 1655 1656 // ord classifies x and returns: 1657 // 1658 // -2 if -Inf == x 1659 // -1 if -Inf < x < 0 1660 // 0 if x == 0 (signed or unsigned) 1661 // +1 if 0 < x < +Inf 1662 // +2 if x == +Inf 1663 // 1664 func (x *Float) ord() int { 1665 var m int 1666 switch x.form { 1667 case finite: 1668 m = 1 1669 case zero: 1670 return 0 1671 case inf: 1672 m = 2 1673 } 1674 if x.neg { 1675 m = -m 1676 } 1677 return m 1678 } 1679 1680 func umax32(x, y uint32) uint32 { 1681 if x > y { 1682 return x 1683 } 1684 return y 1685 }