github.com/rsc/go@v0.0.0-20150416155037-e040fd465409/src/cmd/internal/gc/big/float.go (about) 1 // Copyright 2014 The Go Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 // This file implements multi-precision floating-point numbers. 6 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 7 // can be of mixed precision. Unlike MPFR, the rounding mode is 8 // not specified with each operation, but with each operand. The 9 // rounding mode of the result operand determines the rounding 10 // mode of an operation. This is a from-scratch implementation. 11 12 package big 13 14 import ( 15 "fmt" 16 "math" 17 ) 18 19 const debugFloat = true // enable for debugging 20 21 // A nonzero finite Float represents a multi-precision floating point number 22 // 23 // sign × mantissa × 2**exponent 24 // 25 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 26 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 27 // All Floats are ordered, and the ordering of two Floats x and y 28 // is defined by x.Cmp(y). 29 // 30 // Each Float value also has a precision, rounding mode, and accuracy. 31 // The precision is the maximum number of mantissa bits available to 32 // represent the value. The rounding mode specifies how a result should 33 // be rounded to fit into the mantissa bits, and accuracy describes the 34 // rounding error with respect to the exact result. 35 // 36 // Unless specified otherwise, all operations (including setters) that 37 // specify a *Float variable for the result (usually via the receiver 38 // with the exception of MantExp), round the numeric result according 39 // to the precision and rounding mode of the result variable. 40 // 41 // If the provided result precision is 0 (see below), it is set to the 42 // precision of the argument with the largest precision value before any 43 // rounding takes place, and the rounding mode remains unchanged. Thus, 44 // uninitialized Floats provided as result arguments will have their 45 // precision set to a reasonable value determined by the operands and 46 // their mode is the zero value for RoundingMode (ToNearestEven). 47 // 48 // By setting the desired precision to 24 or 53 and using matching rounding 49 // mode (typically ToNearestEven), Float operations produce the same results 50 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 51 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 52 // Exponent underflow and overflow lead to a 0 or an Infinity for different 53 // values than IEEE-754 because Float exponents have a much larger range. 54 // 55 // The zero (uninitialized) value for a Float is ready to use and represents 56 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 57 // 58 type Float struct { 59 prec uint32 60 mode RoundingMode 61 acc Accuracy 62 form form 63 neg bool 64 mant nat 65 exp int32 66 } 67 68 // Float operations that would lead to a NaN under IEEE-754 rules cause 69 // a run-time panic of ErrNaN type. 70 type ErrNaN struct { 71 msg string 72 } 73 74 // NewFloat allocates and returns a new Float set to x, 75 // with precision 53 and rounding mode ToNearestEven. 76 // NewFloat panics with ErrNaN if x is a NaN. 77 func NewFloat(x float64) *Float { 78 if math.IsNaN(x) { 79 panic(ErrNaN{"NewFloat(NaN)"}) 80 } 81 return new(Float).SetFloat64(x) 82 } 83 84 // Exponent and precision limits. 85 const ( 86 MaxExp = math.MaxInt32 // largest supported exponent 87 MinExp = math.MinInt32 // smallest supported exponent 88 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 89 ) 90 91 // Internal representation: The mantissa bits x.mant of a nonzero finite 92 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 93 // the slice may (but doesn't have to) be shorter if the mantissa contains 94 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 95 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 96 // trailing 0 bits or x.prec is not a multiple of the the Word size _W, 97 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 98 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 99 // 100 // A zero or non-finite Float x ignores x.mant and x.exp. 101 // 102 // x form neg mant exp 103 // ---------------------------------------------------------- 104 // ±0 zero sign - - 105 // 0 < |x| < +Inf finite sign mantissa exponent 106 // ±Inf inf sign - - 107 108 // A form value describes the internal representation. 109 type form byte 110 111 // The form value order is relevant - do not change! 112 const ( 113 zero form = iota 114 finite 115 inf 116 ) 117 118 // RoundingMode determines how a Float value is rounded to the 119 // desired precision. Rounding may change the Float value; the 120 // rounding error is described by the Float's Accuracy. 121 type RoundingMode byte 122 123 // The following rounding modes are supported. 124 const ( 125 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 126 ToNearestAway // == IEEE 754-2008 roundTiesToAway 127 ToZero // == IEEE 754-2008 roundTowardZero 128 AwayFromZero // no IEEE 754-2008 equivalent 129 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 130 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 131 ) 132 133 //go:generate stringer -type=RoundingMode 134 135 // Accuracy describes the rounding error produced by the most recent 136 // operation that generated a Float value, relative to the exact value. 137 type Accuracy int8 138 139 // Constants describing the Accuracy of a Float. 140 const ( 141 Below Accuracy = -1 142 Exact Accuracy = 0 143 Above Accuracy = +1 144 ) 145 146 //go:generate stringer -type=Accuracy 147 148 // SetPrec sets z's precision to prec and returns the (possibly) rounded 149 // value of z. Rounding occurs according to z's rounding mode if the mantissa 150 // cannot be represented in prec bits without loss of precision. 151 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 152 // If prec > MaxPrec, it is set to MaxPrec. 153 func (z *Float) SetPrec(prec uint) *Float { 154 z.acc = Exact // optimistically assume no rounding is needed 155 156 // special case 157 if prec == 0 { 158 z.prec = 0 159 if z.form == finite { 160 // truncate z to 0 161 z.acc = makeAcc(z.neg) 162 z.form = zero 163 } 164 return z 165 } 166 167 // general case 168 if prec > MaxPrec { 169 prec = MaxPrec 170 } 171 old := z.prec 172 z.prec = uint32(prec) 173 if z.prec < old { 174 z.round(0) 175 } 176 return z 177 } 178 179 func makeAcc(above bool) Accuracy { 180 if above { 181 return Above 182 } 183 return Below 184 } 185 186 // SetMode sets z's rounding mode to mode and returns an exact z. 187 // z remains unchanged otherwise. 188 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 189 func (z *Float) SetMode(mode RoundingMode) *Float { 190 z.mode = mode 191 z.acc = Exact 192 return z 193 } 194 195 // Prec returns the mantissa precision of x in bits. 196 // The result may be 0 for |x| == 0 and |x| == Inf. 197 func (x *Float) Prec() uint { 198 return uint(x.prec) 199 } 200 201 // MinPrec returns the minimum precision required to represent x exactly 202 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 203 // The result is 0 for |x| == 0 and |x| == Inf. 204 func (x *Float) MinPrec() uint { 205 if x.form != finite { 206 return 0 207 } 208 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 209 } 210 211 // Mode returns the rounding mode of x. 212 func (x *Float) Mode() RoundingMode { 213 return x.mode 214 } 215 216 // Acc returns the accuracy of x produced by the most recent operation. 217 func (x *Float) Acc() Accuracy { 218 return x.acc 219 } 220 221 // Sign returns: 222 // 223 // -1 if x < 0 224 // 0 if x is ±0 225 // +1 if x > 0 226 // 227 func (x *Float) Sign() int { 228 if debugFloat { 229 x.validate() 230 } 231 if x.form == zero { 232 return 0 233 } 234 if x.neg { 235 return -1 236 } 237 return 1 238 } 239 240 // MantExp breaks x into its mantissa and exponent components 241 // and returns the exponent. If a non-nil mant argument is 242 // provided its value is set to the mantissa of x, with the 243 // same precision and rounding mode as x. The components 244 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 245 // Calling MantExp with a nil argument is an efficient way to 246 // get the exponent of the receiver. 247 // 248 // Special cases are: 249 // 250 // ( ±0).MantExp(mant) = 0, with mant set to ±0 251 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 252 // 253 // x and mant may be the same in which case x is set to its 254 // mantissa value. 255 func (x *Float) MantExp(mant *Float) (exp int) { 256 if debugFloat { 257 x.validate() 258 } 259 if x.form == finite { 260 exp = int(x.exp) 261 } 262 if mant != nil { 263 mant.Copy(x) 264 if mant.form == finite { 265 mant.exp = 0 266 } 267 } 268 return 269 } 270 271 func (z *Float) setExpAndRound(exp int64, sbit uint) { 272 if exp < MinExp { 273 // underflow 274 z.acc = makeAcc(z.neg) 275 z.form = zero 276 return 277 } 278 279 if exp > MaxExp { 280 // overflow 281 z.acc = makeAcc(!z.neg) 282 z.form = inf 283 return 284 } 285 286 z.form = finite 287 z.exp = int32(exp) 288 z.round(sbit) 289 } 290 291 // SetMantExp sets z to mant × 2**exp and and returns z. 292 // The result z has the same precision and rounding mode 293 // as mant. SetMantExp is an inverse of MantExp but does 294 // not require 0.5 <= |mant| < 1.0. Specifically: 295 // 296 // mant := new(Float) 297 // new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true 298 // 299 // Special cases are: 300 // 301 // z.SetMantExp( ±0, exp) = ±0 302 // z.SetMantExp(±Inf, exp) = ±Inf 303 // 304 // z and mant may be the same in which case z's exponent 305 // is set to exp. 306 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 307 if debugFloat { 308 z.validate() 309 mant.validate() 310 } 311 z.Copy(mant) 312 if z.form != finite { 313 return z 314 } 315 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 316 return z 317 } 318 319 // Signbit returns true if x is negative or negative zero. 320 func (x *Float) Signbit() bool { 321 return x.neg 322 } 323 324 // IsInf reports whether x is +Inf or -Inf. 325 func (x *Float) IsInf() bool { 326 return x.form == inf 327 } 328 329 // IsInt reports whether x is an integer. 330 // ±Inf values are not integers. 331 func (x *Float) IsInt() bool { 332 if debugFloat { 333 x.validate() 334 } 335 // special cases 336 if x.form != finite { 337 return x.form == zero 338 } 339 // x.form == finite 340 if x.exp <= 0 { 341 return false 342 } 343 // x.exp > 0 344 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 345 } 346 347 // debugging support 348 func (x *Float) validate() { 349 if !debugFloat { 350 // avoid performance bugs 351 panic("validate called but debugFloat is not set") 352 } 353 if x.form != finite { 354 return 355 } 356 m := len(x.mant) 357 if m == 0 { 358 panic("nonzero finite number with empty mantissa") 359 } 360 const msb = 1 << (_W - 1) 361 if x.mant[m-1]&msb == 0 { 362 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0))) 363 } 364 if x.prec == 0 { 365 panic("zero precision finite number") 366 } 367 } 368 369 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 370 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 371 // have before calling round. z's mantissa must be normalized (with the msb set) 372 // or empty. 373 // 374 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 375 // sign of z. For correct rounding, the sign of z must be set correctly before 376 // calling round. 377 func (z *Float) round(sbit uint) { 378 if debugFloat { 379 z.validate() 380 if z.form > finite { 381 panic(fmt.Sprintf("round called for non-finite value %s", z)) 382 } 383 } 384 // z.form <= finite 385 386 z.acc = Exact 387 if z.form == zero { 388 return 389 } 390 // z.form == finite && len(z.mant) > 0 391 // m > 0 implies z.prec > 0 (checked by validate) 392 393 m := uint32(len(z.mant)) // present mantissa length in words 394 bits := m * _W // present mantissa bits 395 if bits <= z.prec { 396 // mantissa fits => nothing to do 397 return 398 } 399 // bits > z.prec 400 401 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 402 403 // Rounding is based on two bits: the rounding bit (rbit) and the 404 // sticky bit (sbit). The rbit is the bit immediately before the 405 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 406 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 407 // 408 // rbit sbit => "fractional part" 409 // 410 // 0 0 == 0 411 // 0 1 > 0 , < 0.5 412 // 1 0 == 0.5 413 // 1 1 > 0.5, < 1.0 414 415 // bits > z.prec: mantissa too large => round 416 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 417 rbit := z.mant.bit(r) // rounding bit 418 if sbit == 0 { 419 sbit = z.mant.sticky(r) 420 } 421 if debugFloat && sbit&^1 != 0 { 422 panic(fmt.Sprintf("invalid sbit %#x", sbit)) 423 } 424 425 // convert ToXInf rounding modes 426 mode := z.mode 427 switch mode { 428 case ToNegativeInf: 429 mode = ToZero 430 if z.neg { 431 mode = AwayFromZero 432 } 433 case ToPositiveInf: 434 mode = AwayFromZero 435 if z.neg { 436 mode = ToZero 437 } 438 } 439 440 // cut off extra words 441 if m > n { 442 copy(z.mant, z.mant[m-n:]) // move n last words to front 443 z.mant = z.mant[:n] 444 } 445 446 // determine number of trailing zero bits t 447 t := n*_W - z.prec // 0 <= t < _W 448 lsb := Word(1) << t 449 450 // make rounding decision 451 // TODO(gri) This can be simplified (see Bits.round in bits_test.go). 452 switch mode { 453 case ToZero: 454 // nothing to do 455 case ToNearestEven, ToNearestAway: 456 if rbit == 0 { 457 // rounding bits == 0b0x 458 mode = ToZero 459 } else if sbit == 1 { 460 // rounding bits == 0b11 461 mode = AwayFromZero 462 } 463 case AwayFromZero: 464 if rbit|sbit == 0 { 465 mode = ToZero 466 } 467 default: 468 // ToXInf modes have been converted to ToZero or AwayFromZero 469 panic("unreachable") 470 } 471 472 // round and determine accuracy 473 switch mode { 474 case ToZero: 475 if rbit|sbit != 0 { 476 z.acc = Below 477 } 478 479 case ToNearestEven, ToNearestAway: 480 if debugFloat && rbit != 1 { 481 panic("internal error in rounding") 482 } 483 if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 { 484 z.acc = Below 485 break 486 } 487 // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0 488 fallthrough 489 490 case AwayFromZero: 491 // add 1 to mantissa 492 if addVW(z.mant, z.mant, lsb) != 0 { 493 // overflow => shift mantissa right by 1 and add msb 494 shrVU(z.mant, z.mant, 1) 495 z.mant[n-1] |= 1 << (_W - 1) 496 // adjust exponent 497 if z.exp < MaxExp { 498 z.exp++ 499 } else { 500 // exponent overflow 501 z.acc = makeAcc(!z.neg) 502 z.form = inf 503 return 504 } 505 } 506 z.acc = Above 507 } 508 509 // zero out trailing bits in least-significant word 510 z.mant[0] &^= lsb - 1 511 512 // update accuracy 513 if z.acc != Exact && z.neg { 514 z.acc = -z.acc 515 } 516 517 if debugFloat { 518 z.validate() 519 } 520 521 return 522 } 523 524 // nlz returns the number of leading zero bits in x. 525 func nlz(x Word) uint { 526 return _W - uint(bitLen(x)) 527 } 528 529 func nlz64(x uint64) uint { 530 // TODO(gri) this can be done more nicely 531 if _W == 32 { 532 if x>>32 == 0 { 533 return 32 + nlz(Word(x)) 534 } 535 return nlz(Word(x >> 32)) 536 } 537 if _W == 64 { 538 return nlz(Word(x)) 539 } 540 panic("unreachable") 541 } 542 543 func (z *Float) setBits64(neg bool, x uint64) *Float { 544 if z.prec == 0 { 545 z.prec = 64 546 } 547 z.acc = Exact 548 z.neg = neg 549 if x == 0 { 550 z.form = zero 551 return z 552 } 553 // x != 0 554 z.form = finite 555 s := nlz64(x) 556 z.mant = z.mant.setUint64(x << s) 557 z.exp = int32(64 - s) // always fits 558 if z.prec < 64 { 559 z.round(0) 560 } 561 return z 562 } 563 564 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 565 // If z's precision is 0, it is changed to 64 (and rounding will have 566 // no effect). 567 func (z *Float) SetUint64(x uint64) *Float { 568 return z.setBits64(false, x) 569 } 570 571 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 572 // If z's precision is 0, it is changed to 64 (and rounding will have 573 // no effect). 574 func (z *Float) SetInt64(x int64) *Float { 575 u := x 576 if u < 0 { 577 u = -u 578 } 579 // We cannot simply call z.SetUint64(uint64(u)) and change 580 // the sign afterwards because the sign affects rounding. 581 return z.setBits64(x < 0, uint64(u)) 582 } 583 584 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 585 // If z's precision is 0, it is changed to 53 (and rounding will have 586 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 587 func (z *Float) SetFloat64(x float64) *Float { 588 if z.prec == 0 { 589 z.prec = 53 590 } 591 if math.IsNaN(x) { 592 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 593 } 594 z.acc = Exact 595 z.neg = math.Signbit(x) // handle -0, -Inf correctly 596 if x == 0 { 597 z.form = zero 598 return z 599 } 600 if math.IsInf(x, 0) { 601 z.form = inf 602 return z 603 } 604 // normalized x != 0 605 z.form = finite 606 fmant, exp := math.Frexp(x) // get normalized mantissa 607 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 608 z.exp = int32(exp) // always fits 609 if z.prec < 53 { 610 z.round(0) 611 } 612 return z 613 } 614 615 // fnorm normalizes mantissa m by shifting it to the left 616 // such that the msb of the most-significant word (msw) is 1. 617 // It returns the shift amount. It assumes that len(m) != 0. 618 func fnorm(m nat) int64 { 619 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 620 panic("msw of mantissa is 0") 621 } 622 s := nlz(m[len(m)-1]) 623 if s > 0 { 624 c := shlVU(m, m, s) 625 if debugFloat && c != 0 { 626 panic("nlz or shlVU incorrect") 627 } 628 } 629 return int64(s) 630 } 631 632 // SetInt sets z to the (possibly rounded) value of x and returns z. 633 // If z's precision is 0, it is changed to the larger of x.BitLen() 634 // or 64 (and rounding will have no effect). 635 func (z *Float) SetInt(x *Int) *Float { 636 // TODO(gri) can be more efficient if z.prec > 0 637 // but small compared to the size of x, or if there 638 // are many trailing 0's. 639 bits := uint32(x.BitLen()) 640 if z.prec == 0 { 641 z.prec = umax32(bits, 64) 642 } 643 z.acc = Exact 644 z.neg = x.neg 645 if len(x.abs) == 0 { 646 z.form = zero 647 return z 648 } 649 // x != 0 650 z.mant = z.mant.set(x.abs) 651 fnorm(z.mant) 652 z.setExpAndRound(int64(bits), 0) 653 return z 654 } 655 656 // SetRat sets z to the (possibly rounded) value of x and returns z. 657 // If z's precision is 0, it is changed to the largest of a.BitLen(), 658 // b.BitLen(), or 64; with x = a/b. 659 func (z *Float) SetRat(x *Rat) *Float { 660 if x.IsInt() { 661 return z.SetInt(x.Num()) 662 } 663 var a, b Float 664 a.SetInt(x.Num()) 665 b.SetInt(x.Denom()) 666 if z.prec == 0 { 667 z.prec = umax32(a.prec, b.prec) 668 } 669 return z.Quo(&a, &b) 670 } 671 672 // SetInf sets z to the infinite Float -Inf if signbit is 673 // set, or +Inf if signbit is not set, and returns z. The 674 // precision of z is unchanged and the result is always 675 // Exact. 676 func (z *Float) SetInf(signbit bool) *Float { 677 z.acc = Exact 678 z.form = inf 679 z.neg = signbit 680 return z 681 } 682 683 // Set sets z to the (possibly rounded) value of x and returns z. 684 // If z's precision is 0, it is changed to the precision of x 685 // before setting z (and rounding will have no effect). 686 // Rounding is performed according to z's precision and rounding 687 // mode; and z's accuracy reports the result error relative to the 688 // exact (not rounded) result. 689 func (z *Float) Set(x *Float) *Float { 690 if debugFloat { 691 x.validate() 692 } 693 z.acc = Exact 694 if z != x { 695 z.form = x.form 696 z.neg = x.neg 697 if x.form == finite { 698 z.exp = x.exp 699 z.mant = z.mant.set(x.mant) 700 } 701 if z.prec == 0 { 702 z.prec = x.prec 703 } else if z.prec < x.prec { 704 z.round(0) 705 } 706 } 707 return z 708 } 709 710 // Copy sets z to x, with the same precision, rounding mode, and 711 // accuracy as x, and returns z. x is not changed even if z and 712 // x are the same. 713 func (z *Float) Copy(x *Float) *Float { 714 if debugFloat { 715 x.validate() 716 } 717 if z != x { 718 z.prec = x.prec 719 z.mode = x.mode 720 z.acc = x.acc 721 z.form = x.form 722 z.neg = x.neg 723 if z.form == finite { 724 z.mant = z.mant.set(x.mant) 725 z.exp = x.exp 726 } 727 } 728 return z 729 } 730 731 func high32(x nat) uint32 { 732 // TODO(gri) This can be done more efficiently on 32bit platforms. 733 return uint32(high64(x) >> 32) 734 } 735 736 func high64(x nat) uint64 { 737 i := len(x) 738 if i == 0 { 739 return 0 740 } 741 // i > 0 742 v := uint64(x[i-1]) 743 if _W == 32 { 744 v <<= 32 745 if i > 1 { 746 v |= uint64(x[i-2]) 747 } 748 } 749 return v 750 } 751 752 // Uint64 returns the unsigned integer resulting from truncating x 753 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 754 // if x is an integer and Below otherwise. 755 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 756 // for x > math.MaxUint64. 757 func (x *Float) Uint64() (uint64, Accuracy) { 758 if debugFloat { 759 x.validate() 760 } 761 762 switch x.form { 763 case finite: 764 if x.neg { 765 return 0, Above 766 } 767 // 0 < x < +Inf 768 if x.exp <= 0 { 769 // 0 < x < 1 770 return 0, Below 771 } 772 // 1 <= x < Inf 773 if x.exp <= 64 { 774 // u = trunc(x) fits into a uint64 775 u := high64(x.mant) >> (64 - uint32(x.exp)) 776 if x.MinPrec() <= 64 { 777 return u, Exact 778 } 779 return u, Below // x truncated 780 } 781 // x too large 782 return math.MaxUint64, Below 783 784 case zero: 785 return 0, Exact 786 787 case inf: 788 if x.neg { 789 return 0, Above 790 } 791 return math.MaxUint64, Below 792 } 793 794 panic("unreachable") 795 } 796 797 // Int64 returns the integer resulting from truncating x towards zero. 798 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 799 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 800 // The result is (math.MinInt64, Above) for x < math.MinInt64, 801 // and (math.MaxInt64, Below) for x > math.MaxInt64. 802 func (x *Float) Int64() (int64, Accuracy) { 803 if debugFloat { 804 x.validate() 805 } 806 807 switch x.form { 808 case finite: 809 // 0 < |x| < +Inf 810 acc := makeAcc(x.neg) 811 if x.exp <= 0 { 812 // 0 < |x| < 1 813 return 0, acc 814 } 815 // x.exp > 0 816 817 // 1 <= |x| < +Inf 818 if x.exp <= 63 { 819 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 820 i := int64(high64(x.mant) >> (64 - uint32(x.exp))) 821 if x.neg { 822 i = -i 823 } 824 if x.MinPrec() <= uint(x.exp) { 825 return i, Exact 826 } 827 return i, acc // x truncated 828 } 829 if x.neg { 830 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 831 if x.exp == 64 && x.MinPrec() == 1 { 832 acc = Exact 833 } 834 return math.MinInt64, acc 835 } 836 // x too large 837 return math.MaxInt64, Below 838 839 case zero: 840 return 0, Exact 841 842 case inf: 843 if x.neg { 844 return math.MinInt64, Above 845 } 846 return math.MaxInt64, Below 847 } 848 849 panic("unreachable") 850 } 851 852 // TODO(gri) Float32 and Float64 are very similar internally but for the 853 // floatxx parameters and some conversions. Should factor out shared code. 854 855 // Float32 returns the float32 value nearest to x. If x is too small to be 856 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 857 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 858 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 859 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 860 func (x *Float) Float32() (float32, Accuracy) { 861 if debugFloat { 862 x.validate() 863 } 864 865 switch x.form { 866 case finite: 867 // 0 < |x| < +Inf 868 869 const ( 870 fbits = 32 // float size 871 mbits = 23 // mantissa size (excluding implicit msb) 872 ebits = fbits - mbits - 1 // 8 exponent size 873 bias = 1<<(ebits-1) - 1 // 127 exponent bias 874 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 875 emin = 1 - bias // -126 smallest unbiased exponent (normal) 876 emax = bias // 127 largest unbiased exponent (normal) 877 ) 878 879 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 880 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 881 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 882 // corresponding Float exponent. 883 // (see also implementation of math.Ldexp for similar code) 884 885 if x.exp < dmin+1 { 886 // underflow 887 if x.neg { 888 var z float32 889 return -z, Above 890 } 891 return 0.0, Below 892 } 893 // x.exp >= dmin+1 894 895 var r Float 896 r.prec = mbits + 1 // +1 for implicit msb 897 if x.exp < emin+1 { 898 // denormal number - round to fewer bits 899 r.prec = uint32(x.exp - dmin) 900 } 901 r.Set(x) 902 903 // Rounding may have caused r to overflow to ±Inf 904 // (rounding never causes underflows to 0). 905 if r.form == inf { 906 r.exp = emax + 2 // cause overflow below 907 } 908 909 if r.exp > emax+1 { 910 // overflow 911 if x.neg { 912 return float32(math.Inf(-1)), Below 913 } 914 return float32(math.Inf(+1)), Above 915 } 916 // dmin+1 <= r.exp <= emax+1 917 918 var s uint32 919 if r.neg { 920 s = 1 << (fbits - 1) 921 } 922 923 m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 924 925 // Rounding may have caused a denormal number to 926 // become normal. Check again. 927 c := float32(1.0) 928 if r.exp < emin+1 { 929 // denormal number 930 r.exp += mbits 931 c = 1.0 / (1 << mbits) // 2**-mbits 932 } 933 // emin+1 <= r.exp <= emax+1 934 e := uint32(r.exp-emin) << mbits 935 936 return c * math.Float32frombits(s|e|m), r.acc 937 938 case zero: 939 if x.neg { 940 var z float32 941 return -z, Exact 942 } 943 return 0.0, Exact 944 945 case inf: 946 if x.neg { 947 return float32(math.Inf(-1)), Exact 948 } 949 return float32(math.Inf(+1)), Exact 950 } 951 952 panic("unreachable") 953 } 954 955 // Float64 returns the float64 value nearest to x. If x is too small to be 956 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 957 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 958 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 959 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 960 func (x *Float) Float64() (float64, Accuracy) { 961 if debugFloat { 962 x.validate() 963 } 964 965 switch x.form { 966 case finite: 967 // 0 < |x| < +Inf 968 969 const ( 970 fbits = 64 // float size 971 mbits = 52 // mantissa size (excluding implicit msb) 972 ebits = fbits - mbits - 1 // 11 exponent size 973 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 974 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 975 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 976 emax = bias // 1023 largest unbiased exponent (normal) 977 ) 978 979 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 980 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 981 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 982 // corresponding Float exponent. 983 // (see also implementation of math.Ldexp for similar code) 984 985 if x.exp < dmin+1 { 986 // underflow 987 if x.neg { 988 var z float64 989 return -z, Above 990 } 991 return 0.0, Below 992 } 993 // x.exp >= dmin+1 994 995 var r Float 996 r.prec = mbits + 1 // +1 for implicit msb 997 if x.exp < emin+1 { 998 // denormal number - round to fewer bits 999 r.prec = uint32(x.exp - dmin) 1000 } 1001 r.Set(x) 1002 1003 // Rounding may have caused r to overflow to ±Inf 1004 // (rounding never causes underflows to 0). 1005 if r.form == inf { 1006 r.exp = emax + 2 // cause overflow below 1007 } 1008 1009 if r.exp > emax+1 { 1010 // overflow 1011 if x.neg { 1012 return math.Inf(-1), Below 1013 } 1014 return math.Inf(+1), Above 1015 } 1016 // dmin+1 <= r.exp <= emax+1 1017 1018 var s uint64 1019 if r.neg { 1020 s = 1 << (fbits - 1) 1021 } 1022 1023 m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1024 1025 // Rounding may have caused a denormal number to 1026 // become normal. Check again. 1027 c := 1.0 1028 if r.exp < emin+1 { 1029 // denormal number 1030 r.exp += mbits 1031 c = 1.0 / (1 << mbits) // 2**-mbits 1032 } 1033 // emin+1 <= r.exp <= emax+1 1034 e := uint64(r.exp-emin) << mbits 1035 1036 return c * math.Float64frombits(s|e|m), r.acc 1037 1038 case zero: 1039 if x.neg { 1040 var z float64 1041 return -z, Exact 1042 } 1043 return 0.0, Exact 1044 1045 case inf: 1046 if x.neg { 1047 return math.Inf(-1), Exact 1048 } 1049 return math.Inf(+1), Exact 1050 } 1051 1052 panic("unreachable") 1053 } 1054 1055 // Int returns the result of truncating x towards zero; 1056 // or nil if x is an infinity. 1057 // The result is Exact if x.IsInt(); otherwise it is Below 1058 // for x > 0, and Above for x < 0. 1059 // If a non-nil *Int argument z is provided, Int stores 1060 // the result in z instead of allocating a new Int. 1061 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1062 if debugFloat { 1063 x.validate() 1064 } 1065 1066 if z == nil && x.form <= finite { 1067 z = new(Int) 1068 } 1069 1070 switch x.form { 1071 case finite: 1072 // 0 < |x| < +Inf 1073 acc := makeAcc(x.neg) 1074 if x.exp <= 0 { 1075 // 0 < |x| < 1 1076 return z.SetInt64(0), acc 1077 } 1078 // x.exp > 0 1079 1080 // 1 <= |x| < +Inf 1081 // determine minimum required precision for x 1082 allBits := uint(len(x.mant)) * _W 1083 exp := uint(x.exp) 1084 if x.MinPrec() <= exp { 1085 acc = Exact 1086 } 1087 // shift mantissa as needed 1088 if z == nil { 1089 z = new(Int) 1090 } 1091 z.neg = x.neg 1092 switch { 1093 case exp > allBits: 1094 z.abs = z.abs.shl(x.mant, exp-allBits) 1095 default: 1096 z.abs = z.abs.set(x.mant) 1097 case exp < allBits: 1098 z.abs = z.abs.shr(x.mant, allBits-exp) 1099 } 1100 return z, acc 1101 1102 case zero: 1103 return z.SetInt64(0), Exact 1104 1105 case inf: 1106 return nil, makeAcc(x.neg) 1107 } 1108 1109 panic("unreachable") 1110 } 1111 1112 // Rat returns the rational number corresponding to x; 1113 // or nil if x is an infinity. 1114 // The result is Exact is x is not an Inf. 1115 // If a non-nil *Rat argument z is provided, Rat stores 1116 // the result in z instead of allocating a new Rat. 1117 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1118 if debugFloat { 1119 x.validate() 1120 } 1121 1122 if z == nil && x.form <= finite { 1123 z = new(Rat) 1124 } 1125 1126 switch x.form { 1127 case finite: 1128 // 0 < |x| < +Inf 1129 allBits := int32(len(x.mant)) * _W 1130 // build up numerator and denominator 1131 z.a.neg = x.neg 1132 switch { 1133 case x.exp > allBits: 1134 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1135 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1136 // z already in normal form 1137 default: 1138 z.a.abs = z.a.abs.set(x.mant) 1139 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1140 // z already in normal form 1141 case x.exp < allBits: 1142 z.a.abs = z.a.abs.set(x.mant) 1143 t := z.b.abs.setUint64(1) 1144 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1145 z.norm() 1146 } 1147 return z, Exact 1148 1149 case zero: 1150 return z.SetInt64(0), Exact 1151 1152 case inf: 1153 return nil, makeAcc(x.neg) 1154 } 1155 1156 panic("unreachable") 1157 } 1158 1159 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1160 // and returns z. 1161 func (z *Float) Abs(x *Float) *Float { 1162 z.Set(x) 1163 z.neg = false 1164 return z 1165 } 1166 1167 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1168 // and returns z. 1169 func (z *Float) Neg(x *Float) *Float { 1170 z.Set(x) 1171 z.neg = !z.neg 1172 return z 1173 } 1174 1175 func validateBinaryOperands(x, y *Float) { 1176 if !debugFloat { 1177 // avoid performance bugs 1178 panic("validateBinaryOperands called but debugFloat is not set") 1179 } 1180 if len(x.mant) == 0 { 1181 panic("empty mantissa for x") 1182 } 1183 if len(y.mant) == 0 { 1184 panic("empty mantissa for y") 1185 } 1186 } 1187 1188 // z = x + y, ignoring signs of x and y for the addition 1189 // but using the sign of z for rounding the result. 1190 // x and y must have a non-empty mantissa and valid exponent. 1191 func (z *Float) uadd(x, y *Float) { 1192 // Note: This implementation requires 2 shifts most of the 1193 // time. It is also inefficient if exponents or precisions 1194 // differ by wide margins. The following article describes 1195 // an efficient (but much more complicated) implementation 1196 // compatible with the internal representation used here: 1197 // 1198 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1199 // Point Addition With Exact Rounding (as in the MPFR Library)" 1200 // http://www.vinc17.net/research/papers/rnc6.pdf 1201 1202 if debugFloat { 1203 validateBinaryOperands(x, y) 1204 } 1205 1206 // compute exponents ex, ey for mantissa with "binary point" 1207 // on the right (mantissa.0) - use int64 to avoid overflow 1208 ex := int64(x.exp) - int64(len(x.mant))*_W 1209 ey := int64(y.exp) - int64(len(y.mant))*_W 1210 1211 // TODO(gri) having a combined add-and-shift primitive 1212 // could make this code significantly faster 1213 switch { 1214 case ex < ey: 1215 // cannot re-use z.mant w/o testing for aliasing 1216 t := nat(nil).shl(y.mant, uint(ey-ex)) 1217 z.mant = z.mant.add(x.mant, t) 1218 default: 1219 // ex == ey, no shift needed 1220 z.mant = z.mant.add(x.mant, y.mant) 1221 case ex > ey: 1222 // cannot re-use z.mant w/o testing for aliasing 1223 t := nat(nil).shl(x.mant, uint(ex-ey)) 1224 z.mant = z.mant.add(t, y.mant) 1225 ex = ey 1226 } 1227 // len(z.mant) > 0 1228 1229 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1230 } 1231 1232 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1233 // but using the sign of z for rounding the result. 1234 // x and y must have a non-empty mantissa and valid exponent. 1235 func (z *Float) usub(x, y *Float) { 1236 // This code is symmetric to uadd. 1237 // We have not factored the common code out because 1238 // eventually uadd (and usub) should be optimized 1239 // by special-casing, and the code will diverge. 1240 1241 if debugFloat { 1242 validateBinaryOperands(x, y) 1243 } 1244 1245 ex := int64(x.exp) - int64(len(x.mant))*_W 1246 ey := int64(y.exp) - int64(len(y.mant))*_W 1247 1248 switch { 1249 case ex < ey: 1250 // cannot re-use z.mant w/o testing for aliasing 1251 t := nat(nil).shl(y.mant, uint(ey-ex)) 1252 z.mant = t.sub(x.mant, t) 1253 default: 1254 // ex == ey, no shift needed 1255 z.mant = z.mant.sub(x.mant, y.mant) 1256 case ex > ey: 1257 // cannot re-use z.mant w/o testing for aliasing 1258 t := nat(nil).shl(x.mant, uint(ex-ey)) 1259 z.mant = t.sub(t, y.mant) 1260 ex = ey 1261 } 1262 1263 // operands may have cancelled each other out 1264 if len(z.mant) == 0 { 1265 z.acc = Exact 1266 z.form = zero 1267 z.neg = false 1268 return 1269 } 1270 // len(z.mant) > 0 1271 1272 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1273 } 1274 1275 // z = x * y, ignoring signs of x and y for the multiplication 1276 // but using the sign of z for rounding the result. 1277 // x and y must have a non-empty mantissa and valid exponent. 1278 func (z *Float) umul(x, y *Float) { 1279 if debugFloat { 1280 validateBinaryOperands(x, y) 1281 } 1282 1283 // Note: This is doing too much work if the precision 1284 // of z is less than the sum of the precisions of x 1285 // and y which is often the case (e.g., if all floats 1286 // have the same precision). 1287 // TODO(gri) Optimize this for the common case. 1288 1289 e := int64(x.exp) + int64(y.exp) 1290 z.mant = z.mant.mul(x.mant, y.mant) 1291 1292 z.setExpAndRound(e-fnorm(z.mant), 0) 1293 } 1294 1295 // z = x / y, ignoring signs of x and y for the division 1296 // but using the sign of z for rounding the result. 1297 // x and y must have a non-empty mantissa and valid exponent. 1298 func (z *Float) uquo(x, y *Float) { 1299 if debugFloat { 1300 validateBinaryOperands(x, y) 1301 } 1302 1303 // mantissa length in words for desired result precision + 1 1304 // (at least one extra bit so we get the rounding bit after 1305 // the division) 1306 n := int(z.prec/_W) + 1 1307 1308 // compute adjusted x.mant such that we get enough result precision 1309 xadj := x.mant 1310 if d := n - len(x.mant) + len(y.mant); d > 0 { 1311 // d extra words needed => add d "0 digits" to x 1312 xadj = make(nat, len(x.mant)+d) 1313 copy(xadj[d:], x.mant) 1314 } 1315 // TODO(gri): If we have too many digits (d < 0), we should be able 1316 // to shorten x for faster division. But we must be extra careful 1317 // with rounding in that case. 1318 1319 // Compute d before division since there may be aliasing of x.mant 1320 // (via xadj) or y.mant with z.mant. 1321 d := len(xadj) - len(y.mant) 1322 1323 // divide 1324 var r nat 1325 z.mant, r = z.mant.div(nil, xadj, y.mant) 1326 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1327 1328 // The result is long enough to include (at least) the rounding bit. 1329 // If there's a non-zero remainder, the corresponding fractional part 1330 // (if it were computed), would have a non-zero sticky bit (if it were 1331 // zero, it couldn't have a non-zero remainder). 1332 var sbit uint 1333 if len(r) > 0 { 1334 sbit = 1 1335 } 1336 1337 z.setExpAndRound(e-fnorm(z.mant), sbit) 1338 } 1339 1340 // ucmp returns -1, 0, or +1, depending on whether 1341 // |x| < |y|, |x| == |y|, or |x| > |y|. 1342 // x and y must have a non-empty mantissa and valid exponent. 1343 func (x *Float) ucmp(y *Float) int { 1344 if debugFloat { 1345 validateBinaryOperands(x, y) 1346 } 1347 1348 switch { 1349 case x.exp < y.exp: 1350 return -1 1351 case x.exp > y.exp: 1352 return +1 1353 } 1354 // x.exp == y.exp 1355 1356 // compare mantissas 1357 i := len(x.mant) 1358 j := len(y.mant) 1359 for i > 0 || j > 0 { 1360 var xm, ym Word 1361 if i > 0 { 1362 i-- 1363 xm = x.mant[i] 1364 } 1365 if j > 0 { 1366 j-- 1367 ym = y.mant[j] 1368 } 1369 switch { 1370 case xm < ym: 1371 return -1 1372 case xm > ym: 1373 return +1 1374 } 1375 } 1376 1377 return 0 1378 } 1379 1380 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1381 // 1382 // When neither the inputs nor result are NaN, the sign of a product or 1383 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1384 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1385 // one of the addends’ signs; and the sign of the result of conversions, 1386 // the quantize operation, the roundToIntegral operations, and the 1387 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1388 // These rules shall apply even when operands or results are zero or infinite. 1389 // 1390 // When the sum of two operands with opposite signs (or the difference of 1391 // two operands with like signs) is exactly zero, the sign of that sum (or 1392 // difference) shall be +0 in all rounding-direction attributes except 1393 // roundTowardNegative; under that attribute, the sign of an exact zero 1394 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1395 // sign as x even when x is zero. 1396 // 1397 // See also: http://play.golang.org/p/RtH3UCt5IH 1398 1399 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1400 // it is changed to the larger of x's or y's precision before the operation. 1401 // Rounding is performed according to z's precision and rounding mode; and 1402 // z's accuracy reports the result error relative to the exact (not rounded) 1403 // result. Add panics with ErrNaN if x and y are infinities with opposite 1404 // signs. The value of z is undefined in that case. 1405 // 1406 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1407 func (z *Float) Add(x, y *Float) *Float { 1408 if debugFloat { 1409 x.validate() 1410 y.validate() 1411 } 1412 1413 if z.prec == 0 { 1414 z.prec = umax32(x.prec, y.prec) 1415 } 1416 1417 if x.form == finite && y.form == finite { 1418 // x + y (commom case) 1419 z.neg = x.neg 1420 if x.neg == y.neg { 1421 // x + y == x + y 1422 // (-x) + (-y) == -(x + y) 1423 z.uadd(x, y) 1424 } else { 1425 // x + (-y) == x - y == -(y - x) 1426 // (-x) + y == y - x == -(x - y) 1427 if x.ucmp(y) > 0 { 1428 z.usub(x, y) 1429 } else { 1430 z.neg = !z.neg 1431 z.usub(y, x) 1432 } 1433 } 1434 return z 1435 } 1436 1437 if x.form == inf && y.form == inf && x.neg != y.neg { 1438 // +Inf + -Inf 1439 // -Inf + +Inf 1440 // value of z is undefined but make sure it's valid 1441 z.acc = Exact 1442 z.form = zero 1443 z.neg = false 1444 panic(ErrNaN{"addition of infinities with opposite signs"}) 1445 } 1446 1447 if x.form == zero && y.form == zero { 1448 // ±0 + ±0 1449 z.acc = Exact 1450 z.form = zero 1451 z.neg = x.neg && y.neg // -0 + -0 == -0 1452 return z 1453 } 1454 1455 if x.form == inf || y.form == zero { 1456 // ±Inf + y 1457 // x + ±0 1458 return z.Set(x) 1459 } 1460 1461 // ±0 + y 1462 // x + ±Inf 1463 return z.Set(y) 1464 } 1465 1466 // Sub sets z to the rounded difference x-y and returns z. 1467 // Precision, rounding, and accuracy reporting are as for Add. 1468 // Sub panics with ErrNaN if x and y are infinities with equal 1469 // signs. The value of z is undefined in that case. 1470 func (z *Float) Sub(x, y *Float) *Float { 1471 if debugFloat { 1472 x.validate() 1473 y.validate() 1474 } 1475 1476 if z.prec == 0 { 1477 z.prec = umax32(x.prec, y.prec) 1478 } 1479 1480 if x.form == finite && y.form == finite { 1481 // x - y (common case) 1482 z.neg = x.neg 1483 if x.neg != y.neg { 1484 // x - (-y) == x + y 1485 // (-x) - y == -(x + y) 1486 z.uadd(x, y) 1487 } else { 1488 // x - y == x - y == -(y - x) 1489 // (-x) - (-y) == y - x == -(x - y) 1490 if x.ucmp(y) > 0 { 1491 z.usub(x, y) 1492 } else { 1493 z.neg = !z.neg 1494 z.usub(y, x) 1495 } 1496 } 1497 return z 1498 } 1499 1500 if x.form == inf && y.form == inf && x.neg == y.neg { 1501 // +Inf - +Inf 1502 // -Inf - -Inf 1503 // value of z is undefined but make sure it's valid 1504 z.acc = Exact 1505 z.form = zero 1506 z.neg = false 1507 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1508 } 1509 1510 if x.form == zero && y.form == zero { 1511 // ±0 - ±0 1512 z.acc = Exact 1513 z.form = zero 1514 z.neg = x.neg && !y.neg // -0 - +0 == -0 1515 return z 1516 } 1517 1518 if x.form == inf || y.form == zero { 1519 // ±Inf - y 1520 // x - ±0 1521 return z.Set(x) 1522 } 1523 1524 // ±0 - y 1525 // x - ±Inf 1526 return z.Neg(y) 1527 } 1528 1529 // Mul sets z to the rounded product x*y and returns z. 1530 // Precision, rounding, and accuracy reporting are as for Add. 1531 // Mul panics with ErrNaN if one operand is zero and the other 1532 // operand an infinity. The value of z is undefined in that case. 1533 func (z *Float) Mul(x, y *Float) *Float { 1534 if debugFloat { 1535 x.validate() 1536 y.validate() 1537 } 1538 1539 if z.prec == 0 { 1540 z.prec = umax32(x.prec, y.prec) 1541 } 1542 1543 z.neg = x.neg != y.neg 1544 1545 if x.form == finite && y.form == finite { 1546 // x * y (common case) 1547 z.umul(x, y) 1548 return z 1549 } 1550 1551 z.acc = Exact 1552 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1553 // ±0 * ±Inf 1554 // ±Inf * ±0 1555 // value of z is undefined but make sure it's valid 1556 z.form = zero 1557 z.neg = false 1558 panic(ErrNaN{"multiplication of zero with infinity"}) 1559 } 1560 1561 if x.form == inf || y.form == inf { 1562 // ±Inf * y 1563 // x * ±Inf 1564 z.form = inf 1565 return z 1566 } 1567 1568 // ±0 * y 1569 // x * ±0 1570 z.form = zero 1571 return z 1572 } 1573 1574 // Quo sets z to the rounded quotient x/y and returns z. 1575 // Precision, rounding, and accuracy reporting are as for Add. 1576 // Quo panics with ErrNaN if both operands are zero or infinities. 1577 // The value of z is undefined in that case. 1578 func (z *Float) Quo(x, y *Float) *Float { 1579 if debugFloat { 1580 x.validate() 1581 y.validate() 1582 } 1583 1584 if z.prec == 0 { 1585 z.prec = umax32(x.prec, y.prec) 1586 } 1587 1588 z.neg = x.neg != y.neg 1589 1590 if x.form == finite && y.form == finite { 1591 // x / y (common case) 1592 z.uquo(x, y) 1593 return z 1594 } 1595 1596 z.acc = Exact 1597 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1598 // ±0 / ±0 1599 // ±Inf / ±Inf 1600 // value of z is undefined but make sure it's valid 1601 z.form = zero 1602 z.neg = false 1603 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1604 } 1605 1606 if x.form == zero || y.form == inf { 1607 // ±0 / y 1608 // x / ±Inf 1609 z.form = zero 1610 return z 1611 } 1612 1613 // x / ±0 1614 // ±Inf / y 1615 z.form = inf 1616 return z 1617 } 1618 1619 // Cmp compares x and y and returns: 1620 // 1621 // -1 if x < y 1622 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1623 // +1 if x > y 1624 // 1625 func (x *Float) Cmp(y *Float) int { 1626 if debugFloat { 1627 x.validate() 1628 y.validate() 1629 } 1630 1631 mx := x.ord() 1632 my := y.ord() 1633 switch { 1634 case mx < my: 1635 return -1 1636 case mx > my: 1637 return +1 1638 } 1639 // mx == my 1640 1641 // only if |mx| == 1 we have to compare the mantissae 1642 switch mx { 1643 case -1: 1644 return y.ucmp(x) 1645 case +1: 1646 return x.ucmp(y) 1647 } 1648 1649 return 0 1650 } 1651 1652 // ord classifies x and returns: 1653 // 1654 // -2 if -Inf == x 1655 // -1 if -Inf < x < 0 1656 // 0 if x == 0 (signed or unsigned) 1657 // +1 if 0 < x < +Inf 1658 // +2 if x == +Inf 1659 // 1660 func (x *Float) ord() int { 1661 var m int 1662 switch x.form { 1663 case finite: 1664 m = 1 1665 case zero: 1666 return 0 1667 case inf: 1668 m = 2 1669 } 1670 if x.neg { 1671 m = -m 1672 } 1673 return m 1674 } 1675 1676 func umax32(x, y uint32) uint32 { 1677 if x > y { 1678 return x 1679 } 1680 return y 1681 }