github.com/rsc/tmp@v0.0.0-20240517235954-6deaab19748b/bootstrap/internal/gc/big/float.go (about) 1 // Do not edit. Bootstrap copy of /Users/rsc/g/go/src/cmd/internal/gc/big/float.go 2 3 // Copyright 2014 The Go Authors. All rights reserved. 4 // Use of this source code is governed by a BSD-style 5 // license that can be found in the LICENSE file. 6 7 // This file implements multi-precision floating-point numbers. 8 // Like in the GNU MPFR library (http://www.mpfr.org/), operands 9 // can be of mixed precision. Unlike MPFR, the rounding mode is 10 // not specified with each operation, but with each operand. The 11 // rounding mode of the result operand determines the rounding 12 // mode of an operation. This is a from-scratch implementation. 13 14 package big 15 16 import ( 17 "fmt" 18 "math" 19 ) 20 21 const debugFloat = true // enable for debugging 22 23 // A nonzero finite Float represents a multi-precision floating point number 24 // 25 // sign × mantissa × 2**exponent 26 // 27 // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp. 28 // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf). 29 // All Floats are ordered, and the ordering of two Floats x and y 30 // is defined by x.Cmp(y). 31 // 32 // Each Float value also has a precision, rounding mode, and accuracy. 33 // The precision is the maximum number of mantissa bits available to 34 // represent the value. The rounding mode specifies how a result should 35 // be rounded to fit into the mantissa bits, and accuracy describes the 36 // rounding error with respect to the exact result. 37 // 38 // Unless specified otherwise, all operations (including setters) that 39 // specify a *Float variable for the result (usually via the receiver 40 // with the exception of MantExp), round the numeric result according 41 // to the precision and rounding mode of the result variable. 42 // 43 // If the provided result precision is 0 (see below), it is set to the 44 // precision of the argument with the largest precision value before any 45 // rounding takes place, and the rounding mode remains unchanged. Thus, 46 // uninitialized Floats provided as result arguments will have their 47 // precision set to a reasonable value determined by the operands and 48 // their mode is the zero value for RoundingMode (ToNearestEven). 49 // 50 // By setting the desired precision to 24 or 53 and using matching rounding 51 // mode (typically ToNearestEven), Float operations produce the same results 52 // as the corresponding float32 or float64 IEEE-754 arithmetic for operands 53 // that correspond to normal (i.e., not denormal) float32 or float64 numbers. 54 // Exponent underflow and overflow lead to a 0 or an Infinity for different 55 // values than IEEE-754 because Float exponents have a much larger range. 56 // 57 // The zero (uninitialized) value for a Float is ready to use and represents 58 // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven. 59 // 60 type Float struct { 61 prec uint32 62 mode RoundingMode 63 acc Accuracy 64 form form 65 neg bool 66 mant nat 67 exp int32 68 } 69 70 // Float operations that would lead to a NaN under IEEE-754 rules cause 71 // a run-time panic of ErrNaN type. 72 type ErrNaN struct { 73 msg string 74 } 75 76 // NewFloat allocates and returns a new Float set to x, 77 // with precision 53 and rounding mode ToNearestEven. 78 // NewFloat panics with ErrNaN if x is a NaN. 79 func NewFloat(x float64) *Float { 80 if math.IsNaN(x) { 81 panic(ErrNaN{"NewFloat(NaN)"}) 82 } 83 return new(Float).SetFloat64(x) 84 } 85 86 // Exponent and precision limits. 87 const ( 88 MaxExp = math.MaxInt32 // largest supported exponent 89 MinExp = math.MinInt32 // smallest supported exponent 90 MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited 91 ) 92 93 // Internal representation: The mantissa bits x.mant of a nonzero finite 94 // Float x are stored in a nat slice long enough to hold up to x.prec bits; 95 // the slice may (but doesn't have to) be shorter if the mantissa contains 96 // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e., 97 // the msb is shifted all the way "to the left"). Thus, if the mantissa has 98 // trailing 0 bits or x.prec is not a multiple of the the Word size _W, 99 // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds 100 // to the value 0.5; the exponent x.exp shifts the binary point as needed. 101 // 102 // A zero or non-finite Float x ignores x.mant and x.exp. 103 // 104 // x form neg mant exp 105 // ---------------------------------------------------------- 106 // ±0 zero sign - - 107 // 0 < |x| < +Inf finite sign mantissa exponent 108 // ±Inf inf sign - - 109 110 // A form value describes the internal representation. 111 type form byte 112 113 // The form value order is relevant - do not change! 114 const ( 115 zero form = iota 116 finite 117 inf 118 ) 119 120 // RoundingMode determines how a Float value is rounded to the 121 // desired precision. Rounding may change the Float value; the 122 // rounding error is described by the Float's Accuracy. 123 type RoundingMode byte 124 125 // The following rounding modes are supported. 126 const ( 127 ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven 128 ToNearestAway // == IEEE 754-2008 roundTiesToAway 129 ToZero // == IEEE 754-2008 roundTowardZero 130 AwayFromZero // no IEEE 754-2008 equivalent 131 ToNegativeInf // == IEEE 754-2008 roundTowardNegative 132 ToPositiveInf // == IEEE 754-2008 roundTowardPositive 133 ) 134 135 //go:generate stringer -type=RoundingMode 136 137 // Accuracy describes the rounding error produced by the most recent 138 // operation that generated a Float value, relative to the exact value. 139 type Accuracy int8 140 141 // Constants describing the Accuracy of a Float. 142 const ( 143 Below Accuracy = -1 144 Exact Accuracy = 0 145 Above Accuracy = +1 146 ) 147 148 //go:generate stringer -type=Accuracy 149 150 // SetPrec sets z's precision to prec and returns the (possibly) rounded 151 // value of z. Rounding occurs according to z's rounding mode if the mantissa 152 // cannot be represented in prec bits without loss of precision. 153 // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged. 154 // If prec > MaxPrec, it is set to MaxPrec. 155 func (z *Float) SetPrec(prec uint) *Float { 156 z.acc = Exact // optimistically assume no rounding is needed 157 158 // special case 159 if prec == 0 { 160 z.prec = 0 161 if z.form == finite { 162 // truncate z to 0 163 z.acc = makeAcc(z.neg) 164 z.form = zero 165 } 166 return z 167 } 168 169 // general case 170 if prec > MaxPrec { 171 prec = MaxPrec 172 } 173 old := z.prec 174 z.prec = uint32(prec) 175 if z.prec < old { 176 z.round(0) 177 } 178 return z 179 } 180 181 func makeAcc(above bool) Accuracy { 182 if above { 183 return Above 184 } 185 return Below 186 } 187 188 // SetMode sets z's rounding mode to mode and returns an exact z. 189 // z remains unchanged otherwise. 190 // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact. 191 func (z *Float) SetMode(mode RoundingMode) *Float { 192 z.mode = mode 193 z.acc = Exact 194 return z 195 } 196 197 // Prec returns the mantissa precision of x in bits. 198 // The result may be 0 for |x| == 0 and |x| == Inf. 199 func (x *Float) Prec() uint { 200 return uint(x.prec) 201 } 202 203 // MinPrec returns the minimum precision required to represent x exactly 204 // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x). 205 // The result is 0 for |x| == 0 and |x| == Inf. 206 func (x *Float) MinPrec() uint { 207 if x.form != finite { 208 return 0 209 } 210 return uint(len(x.mant))*_W - x.mant.trailingZeroBits() 211 } 212 213 // Mode returns the rounding mode of x. 214 func (x *Float) Mode() RoundingMode { 215 return x.mode 216 } 217 218 // Acc returns the accuracy of x produced by the most recent operation. 219 func (x *Float) Acc() Accuracy { 220 return x.acc 221 } 222 223 // Sign returns: 224 // 225 // -1 if x < 0 226 // 0 if x is ±0 227 // +1 if x > 0 228 // 229 func (x *Float) Sign() int { 230 if debugFloat { 231 x.validate() 232 } 233 if x.form == zero { 234 return 0 235 } 236 if x.neg { 237 return -1 238 } 239 return 1 240 } 241 242 // MantExp breaks x into its mantissa and exponent components 243 // and returns the exponent. If a non-nil mant argument is 244 // provided its value is set to the mantissa of x, with the 245 // same precision and rounding mode as x. The components 246 // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0. 247 // Calling MantExp with a nil argument is an efficient way to 248 // get the exponent of the receiver. 249 // 250 // Special cases are: 251 // 252 // ( ±0).MantExp(mant) = 0, with mant set to ±0 253 // (±Inf).MantExp(mant) = 0, with mant set to ±Inf 254 // 255 // x and mant may be the same in which case x is set to its 256 // mantissa value. 257 func (x *Float) MantExp(mant *Float) (exp int) { 258 if debugFloat { 259 x.validate() 260 } 261 if x.form == finite { 262 exp = int(x.exp) 263 } 264 if mant != nil { 265 mant.Copy(x) 266 if mant.form == finite { 267 mant.exp = 0 268 } 269 } 270 return 271 } 272 273 func (z *Float) setExpAndRound(exp int64, sbit uint) { 274 if exp < MinExp { 275 // underflow 276 z.acc = makeAcc(z.neg) 277 z.form = zero 278 return 279 } 280 281 if exp > MaxExp { 282 // overflow 283 z.acc = makeAcc(!z.neg) 284 z.form = inf 285 return 286 } 287 288 z.form = finite 289 z.exp = int32(exp) 290 z.round(sbit) 291 } 292 293 // SetMantExp sets z to mant × 2**exp and and returns z. 294 // The result z has the same precision and rounding mode 295 // as mant. SetMantExp is an inverse of MantExp but does 296 // not require 0.5 <= |mant| < 1.0. Specifically: 297 // 298 // mant := new(Float) 299 // new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true 300 // 301 // Special cases are: 302 // 303 // z.SetMantExp( ±0, exp) = ±0 304 // z.SetMantExp(±Inf, exp) = ±Inf 305 // 306 // z and mant may be the same in which case z's exponent 307 // is set to exp. 308 func (z *Float) SetMantExp(mant *Float, exp int) *Float { 309 if debugFloat { 310 z.validate() 311 mant.validate() 312 } 313 z.Copy(mant) 314 if z.form != finite { 315 return z 316 } 317 z.setExpAndRound(int64(z.exp)+int64(exp), 0) 318 return z 319 } 320 321 // Signbit returns true if x is negative or negative zero. 322 func (x *Float) Signbit() bool { 323 return x.neg 324 } 325 326 // IsInf reports whether x is +Inf or -Inf. 327 func (x *Float) IsInf() bool { 328 return x.form == inf 329 } 330 331 // IsInt reports whether x is an integer. 332 // ±Inf values are not integers. 333 func (x *Float) IsInt() bool { 334 if debugFloat { 335 x.validate() 336 } 337 // special cases 338 if x.form != finite { 339 return x.form == zero 340 } 341 // x.form == finite 342 if x.exp <= 0 { 343 return false 344 } 345 // x.exp > 0 346 return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa 347 } 348 349 // debugging support 350 func (x *Float) validate() { 351 if !debugFloat { 352 // avoid performance bugs 353 panic("validate called but debugFloat is not set") 354 } 355 if x.form != finite { 356 return 357 } 358 m := len(x.mant) 359 if m == 0 { 360 panic("nonzero finite number with empty mantissa") 361 } 362 const msb = 1 << (_W - 1) 363 if x.mant[m-1]&msb == 0 { 364 panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Format('p', 0))) 365 } 366 if x.prec == 0 { 367 panic("zero precision finite number") 368 } 369 } 370 371 // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly. 372 // sbit must be 0 or 1 and summarizes any "sticky bit" information one might 373 // have before calling round. z's mantissa must be normalized (with the msb set) 374 // or empty. 375 // 376 // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the 377 // sign of z. For correct rounding, the sign of z must be set correctly before 378 // calling round. 379 func (z *Float) round(sbit uint) { 380 if debugFloat { 381 z.validate() 382 if z.form > finite { 383 panic(fmt.Sprintf("round called for non-finite value %s", z)) 384 } 385 } 386 // z.form <= finite 387 388 z.acc = Exact 389 if z.form == zero { 390 return 391 } 392 // z.form == finite && len(z.mant) > 0 393 // m > 0 implies z.prec > 0 (checked by validate) 394 395 m := uint32(len(z.mant)) // present mantissa length in words 396 bits := m * _W // present mantissa bits 397 if bits <= z.prec { 398 // mantissa fits => nothing to do 399 return 400 } 401 // bits > z.prec 402 403 n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision 404 405 // Rounding is based on two bits: the rounding bit (rbit) and the 406 // sticky bit (sbit). The rbit is the bit immediately before the 407 // z.prec leading mantissa bits (the "0.5"). The sbit is set if any 408 // of the bits before the rbit are set (the "0.25", "0.125", etc.): 409 // 410 // rbit sbit => "fractional part" 411 // 412 // 0 0 == 0 413 // 0 1 > 0 , < 0.5 414 // 1 0 == 0.5 415 // 1 1 > 0.5, < 1.0 416 417 // bits > z.prec: mantissa too large => round 418 r := uint(bits - z.prec - 1) // rounding bit position; r >= 0 419 rbit := z.mant.bit(r) // rounding bit 420 if sbit == 0 { 421 sbit = z.mant.sticky(r) 422 } 423 if debugFloat && sbit&^1 != 0 { 424 panic(fmt.Sprintf("invalid sbit %#x", sbit)) 425 } 426 427 // convert ToXInf rounding modes 428 mode := z.mode 429 switch mode { 430 case ToNegativeInf: 431 mode = ToZero 432 if z.neg { 433 mode = AwayFromZero 434 } 435 case ToPositiveInf: 436 mode = AwayFromZero 437 if z.neg { 438 mode = ToZero 439 } 440 } 441 442 // cut off extra words 443 if m > n { 444 copy(z.mant, z.mant[m-n:]) // move n last words to front 445 z.mant = z.mant[:n] 446 } 447 448 // determine number of trailing zero bits t 449 t := n*_W - z.prec // 0 <= t < _W 450 lsb := Word(1) << t 451 452 // make rounding decision 453 // TODO(gri) This can be simplified (see Bits.round in bits_test.go). 454 switch mode { 455 case ToZero: 456 // nothing to do 457 case ToNearestEven, ToNearestAway: 458 if rbit == 0 { 459 // rounding bits == 0b0x 460 mode = ToZero 461 } else if sbit == 1 { 462 // rounding bits == 0b11 463 mode = AwayFromZero 464 } 465 case AwayFromZero: 466 if rbit|sbit == 0 { 467 mode = ToZero 468 } 469 default: 470 // ToXInf modes have been converted to ToZero or AwayFromZero 471 panic("unreachable") 472 } 473 474 // round and determine accuracy 475 switch mode { 476 case ToZero: 477 if rbit|sbit != 0 { 478 z.acc = Below 479 } 480 481 case ToNearestEven, ToNearestAway: 482 if debugFloat && rbit != 1 { 483 panic("internal error in rounding") 484 } 485 if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 { 486 z.acc = Below 487 break 488 } 489 // mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0 490 fallthrough 491 492 case AwayFromZero: 493 // add 1 to mantissa 494 if addVW(z.mant, z.mant, lsb) != 0 { 495 // overflow => shift mantissa right by 1 and add msb 496 shrVU(z.mant, z.mant, 1) 497 z.mant[n-1] |= 1 << (_W - 1) 498 // adjust exponent 499 if z.exp < MaxExp { 500 z.exp++ 501 } else { 502 // exponent overflow 503 z.acc = makeAcc(!z.neg) 504 z.form = inf 505 return 506 } 507 } 508 z.acc = Above 509 } 510 511 // zero out trailing bits in least-significant word 512 z.mant[0] &^= lsb - 1 513 514 // update accuracy 515 if z.acc != Exact && z.neg { 516 z.acc = -z.acc 517 } 518 519 if debugFloat { 520 z.validate() 521 } 522 523 return 524 } 525 526 // nlz returns the number of leading zero bits in x. 527 func nlz(x Word) uint { 528 return _W - uint(bitLen(x)) 529 } 530 531 func nlz64(x uint64) uint { 532 // TODO(gri) this can be done more nicely 533 if _W == 32 { 534 if x>>32 == 0 { 535 return 32 + nlz(Word(x)) 536 } 537 return nlz(Word(x >> 32)) 538 } 539 if _W == 64 { 540 return nlz(Word(x)) 541 } 542 panic("unreachable") 543 } 544 545 func (z *Float) setBits64(neg bool, x uint64) *Float { 546 if z.prec == 0 { 547 z.prec = 64 548 } 549 z.acc = Exact 550 z.neg = neg 551 if x == 0 { 552 z.form = zero 553 return z 554 } 555 // x != 0 556 z.form = finite 557 s := nlz64(x) 558 z.mant = z.mant.setUint64(x << s) 559 z.exp = int32(64 - s) // always fits 560 if z.prec < 64 { 561 z.round(0) 562 } 563 return z 564 } 565 566 // SetUint64 sets z to the (possibly rounded) value of x and returns z. 567 // If z's precision is 0, it is changed to 64 (and rounding will have 568 // no effect). 569 func (z *Float) SetUint64(x uint64) *Float { 570 return z.setBits64(false, x) 571 } 572 573 // SetInt64 sets z to the (possibly rounded) value of x and returns z. 574 // If z's precision is 0, it is changed to 64 (and rounding will have 575 // no effect). 576 func (z *Float) SetInt64(x int64) *Float { 577 u := x 578 if u < 0 { 579 u = -u 580 } 581 // We cannot simply call z.SetUint64(uint64(u)) and change 582 // the sign afterwards because the sign affects rounding. 583 return z.setBits64(x < 0, uint64(u)) 584 } 585 586 // SetFloat64 sets z to the (possibly rounded) value of x and returns z. 587 // If z's precision is 0, it is changed to 53 (and rounding will have 588 // no effect). SetFloat64 panics with ErrNaN if x is a NaN. 589 func (z *Float) SetFloat64(x float64) *Float { 590 if z.prec == 0 { 591 z.prec = 53 592 } 593 if math.IsNaN(x) { 594 panic(ErrNaN{"Float.SetFloat64(NaN)"}) 595 } 596 z.acc = Exact 597 z.neg = math.Signbit(x) // handle -0, -Inf correctly 598 if x == 0 { 599 z.form = zero 600 return z 601 } 602 if math.IsInf(x, 0) { 603 z.form = inf 604 return z 605 } 606 // normalized x != 0 607 z.form = finite 608 fmant, exp := math.Frexp(x) // get normalized mantissa 609 z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11) 610 z.exp = int32(exp) // always fits 611 if z.prec < 53 { 612 z.round(0) 613 } 614 return z 615 } 616 617 // fnorm normalizes mantissa m by shifting it to the left 618 // such that the msb of the most-significant word (msw) is 1. 619 // It returns the shift amount. It assumes that len(m) != 0. 620 func fnorm(m nat) int64 { 621 if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) { 622 panic("msw of mantissa is 0") 623 } 624 s := nlz(m[len(m)-1]) 625 if s > 0 { 626 c := shlVU(m, m, s) 627 if debugFloat && c != 0 { 628 panic("nlz or shlVU incorrect") 629 } 630 } 631 return int64(s) 632 } 633 634 // SetInt sets z to the (possibly rounded) value of x and returns z. 635 // If z's precision is 0, it is changed to the larger of x.BitLen() 636 // or 64 (and rounding will have no effect). 637 func (z *Float) SetInt(x *Int) *Float { 638 // TODO(gri) can be more efficient if z.prec > 0 639 // but small compared to the size of x, or if there 640 // are many trailing 0's. 641 bits := uint32(x.BitLen()) 642 if z.prec == 0 { 643 z.prec = umax32(bits, 64) 644 } 645 z.acc = Exact 646 z.neg = x.neg 647 if len(x.abs) == 0 { 648 z.form = zero 649 return z 650 } 651 // x != 0 652 z.mant = z.mant.set(x.abs) 653 fnorm(z.mant) 654 z.setExpAndRound(int64(bits), 0) 655 return z 656 } 657 658 // SetRat sets z to the (possibly rounded) value of x and returns z. 659 // If z's precision is 0, it is changed to the largest of a.BitLen(), 660 // b.BitLen(), or 64; with x = a/b. 661 func (z *Float) SetRat(x *Rat) *Float { 662 if x.IsInt() { 663 return z.SetInt(x.Num()) 664 } 665 var a, b Float 666 a.SetInt(x.Num()) 667 b.SetInt(x.Denom()) 668 if z.prec == 0 { 669 z.prec = umax32(a.prec, b.prec) 670 } 671 return z.Quo(&a, &b) 672 } 673 674 // SetInf sets z to the infinite Float -Inf if signbit is 675 // set, or +Inf if signbit is not set, and returns z. The 676 // precision of z is unchanged and the result is always 677 // Exact. 678 func (z *Float) SetInf(signbit bool) *Float { 679 z.acc = Exact 680 z.form = inf 681 z.neg = signbit 682 return z 683 } 684 685 // Set sets z to the (possibly rounded) value of x and returns z. 686 // If z's precision is 0, it is changed to the precision of x 687 // before setting z (and rounding will have no effect). 688 // Rounding is performed according to z's precision and rounding 689 // mode; and z's accuracy reports the result error relative to the 690 // exact (not rounded) result. 691 func (z *Float) Set(x *Float) *Float { 692 if debugFloat { 693 x.validate() 694 } 695 z.acc = Exact 696 if z != x { 697 z.form = x.form 698 z.neg = x.neg 699 if x.form == finite { 700 z.exp = x.exp 701 z.mant = z.mant.set(x.mant) 702 } 703 if z.prec == 0 { 704 z.prec = x.prec 705 } else if z.prec < x.prec { 706 z.round(0) 707 } 708 } 709 return z 710 } 711 712 // Copy sets z to x, with the same precision, rounding mode, and 713 // accuracy as x, and returns z. x is not changed even if z and 714 // x are the same. 715 func (z *Float) Copy(x *Float) *Float { 716 if debugFloat { 717 x.validate() 718 } 719 if z != x { 720 z.prec = x.prec 721 z.mode = x.mode 722 z.acc = x.acc 723 z.form = x.form 724 z.neg = x.neg 725 if z.form == finite { 726 z.mant = z.mant.set(x.mant) 727 z.exp = x.exp 728 } 729 } 730 return z 731 } 732 733 func high32(x nat) uint32 { 734 // TODO(gri) This can be done more efficiently on 32bit platforms. 735 return uint32(high64(x) >> 32) 736 } 737 738 func high64(x nat) uint64 { 739 i := len(x) 740 if i == 0 { 741 return 0 742 } 743 // i > 0 744 v := uint64(x[i-1]) 745 if _W == 32 { 746 v <<= 32 747 if i > 1 { 748 v |= uint64(x[i-2]) 749 } 750 } 751 return v 752 } 753 754 // Uint64 returns the unsigned integer resulting from truncating x 755 // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact 756 // if x is an integer and Below otherwise. 757 // The result is (0, Above) for x < 0, and (math.MaxUint64, Below) 758 // for x > math.MaxUint64. 759 func (x *Float) Uint64() (uint64, Accuracy) { 760 if debugFloat { 761 x.validate() 762 } 763 764 switch x.form { 765 case finite: 766 if x.neg { 767 return 0, Above 768 } 769 // 0 < x < +Inf 770 if x.exp <= 0 { 771 // 0 < x < 1 772 return 0, Below 773 } 774 // 1 <= x < Inf 775 if x.exp <= 64 { 776 // u = trunc(x) fits into a uint64 777 u := high64(x.mant) >> (64 - uint32(x.exp)) 778 if x.MinPrec() <= 64 { 779 return u, Exact 780 } 781 return u, Below // x truncated 782 } 783 // x too large 784 return math.MaxUint64, Below 785 786 case zero: 787 return 0, Exact 788 789 case inf: 790 if x.neg { 791 return 0, Above 792 } 793 return math.MaxUint64, Below 794 } 795 796 panic("unreachable") 797 } 798 799 // Int64 returns the integer resulting from truncating x towards zero. 800 // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is 801 // an integer, and Above (x < 0) or Below (x > 0) otherwise. 802 // The result is (math.MinInt64, Above) for x < math.MinInt64, 803 // and (math.MaxInt64, Below) for x > math.MaxInt64. 804 func (x *Float) Int64() (int64, Accuracy) { 805 if debugFloat { 806 x.validate() 807 } 808 809 switch x.form { 810 case finite: 811 // 0 < |x| < +Inf 812 acc := makeAcc(x.neg) 813 if x.exp <= 0 { 814 // 0 < |x| < 1 815 return 0, acc 816 } 817 // x.exp > 0 818 819 // 1 <= |x| < +Inf 820 if x.exp <= 63 { 821 // i = trunc(x) fits into an int64 (excluding math.MinInt64) 822 i := int64(high64(x.mant) >> (64 - uint32(x.exp))) 823 if x.neg { 824 i = -i 825 } 826 if x.MinPrec() <= uint(x.exp) { 827 return i, Exact 828 } 829 return i, acc // x truncated 830 } 831 if x.neg { 832 // check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64)) 833 if x.exp == 64 && x.MinPrec() == 1 { 834 acc = Exact 835 } 836 return math.MinInt64, acc 837 } 838 // x too large 839 return math.MaxInt64, Below 840 841 case zero: 842 return 0, Exact 843 844 case inf: 845 if x.neg { 846 return math.MinInt64, Above 847 } 848 return math.MaxInt64, Below 849 } 850 851 panic("unreachable") 852 } 853 854 // TODO(gri) Float32 and Float64 are very similar internally but for the 855 // floatxx parameters and some conversions. Should factor out shared code. 856 857 // Float32 returns the float32 value nearest to x. If x is too small to be 858 // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result 859 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 860 // If x is too large to be represented by a float32 (|x| > math.MaxFloat32), 861 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 862 func (x *Float) Float32() (float32, Accuracy) { 863 if debugFloat { 864 x.validate() 865 } 866 867 switch x.form { 868 case finite: 869 // 0 < |x| < +Inf 870 871 const ( 872 fbits = 32 // float size 873 mbits = 23 // mantissa size (excluding implicit msb) 874 ebits = fbits - mbits - 1 // 8 exponent size 875 bias = 1<<(ebits-1) - 1 // 127 exponent bias 876 dmin = 1 - bias - mbits // -149 smallest unbiased exponent (denormal) 877 emin = 1 - bias // -126 smallest unbiased exponent (normal) 878 emax = bias // 127 largest unbiased exponent (normal) 879 ) 880 881 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 882 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 883 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 884 // corresponding Float exponent. 885 // (see also implementation of math.Ldexp for similar code) 886 887 if x.exp < dmin+1 { 888 // underflow 889 if x.neg { 890 var z float32 891 return -z, Above 892 } 893 return 0.0, Below 894 } 895 // x.exp >= dmin+1 896 897 var r Float 898 r.prec = mbits + 1 // +1 for implicit msb 899 if x.exp < emin+1 { 900 // denormal number - round to fewer bits 901 r.prec = uint32(x.exp - dmin) 902 } 903 r.Set(x) 904 905 // Rounding may have caused r to overflow to ±Inf 906 // (rounding never causes underflows to 0). 907 if r.form == inf { 908 r.exp = emax + 2 // cause overflow below 909 } 910 911 if r.exp > emax+1 { 912 // overflow 913 if x.neg { 914 return float32(math.Inf(-1)), Below 915 } 916 return float32(math.Inf(+1)), Above 917 } 918 // dmin+1 <= r.exp <= emax+1 919 920 var s uint32 921 if r.neg { 922 s = 1 << (fbits - 1) 923 } 924 925 m := high32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 926 927 // Rounding may have caused a denormal number to 928 // become normal. Check again. 929 c := float32(1.0) 930 if r.exp < emin+1 { 931 // denormal number 932 r.exp += mbits 933 c = 1.0 / (1 << mbits) // 2**-mbits 934 } 935 // emin+1 <= r.exp <= emax+1 936 e := uint32(r.exp-emin) << mbits 937 938 return c * math.Float32frombits(s|e|m), r.acc 939 940 case zero: 941 if x.neg { 942 var z float32 943 return -z, Exact 944 } 945 return 0.0, Exact 946 947 case inf: 948 if x.neg { 949 return float32(math.Inf(-1)), Exact 950 } 951 return float32(math.Inf(+1)), Exact 952 } 953 954 panic("unreachable") 955 } 956 957 // Float64 returns the float64 value nearest to x. If x is too small to be 958 // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result 959 // is (0, Below) or (-0, Above), respectively, depending on the sign of x. 960 // If x is too large to be represented by a float64 (|x| > math.MaxFloat64), 961 // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x. 962 func (x *Float) Float64() (float64, Accuracy) { 963 if debugFloat { 964 x.validate() 965 } 966 967 switch x.form { 968 case finite: 969 // 0 < |x| < +Inf 970 971 const ( 972 fbits = 64 // float size 973 mbits = 52 // mantissa size (excluding implicit msb) 974 ebits = fbits - mbits - 1 // 11 exponent size 975 bias = 1<<(ebits-1) - 1 // 1023 exponent bias 976 dmin = 1 - bias - mbits // -1074 smallest unbiased exponent (denormal) 977 emin = 1 - bias // -1022 smallest unbiased exponent (normal) 978 emax = bias // 1023 largest unbiased exponent (normal) 979 ) 980 981 // Float mantissae m have an explicit msb and are in the range 0.5 <= m < 1.0. 982 // floatxx mantissae have an implicit msb and are in the range 1.0 <= m < 2.0. 983 // For a given mantissa m, we need to add 1 to a floatxx exponent to get the 984 // corresponding Float exponent. 985 // (see also implementation of math.Ldexp for similar code) 986 987 if x.exp < dmin+1 { 988 // underflow 989 if x.neg { 990 var z float64 991 return -z, Above 992 } 993 return 0.0, Below 994 } 995 // x.exp >= dmin+1 996 997 var r Float 998 r.prec = mbits + 1 // +1 for implicit msb 999 if x.exp < emin+1 { 1000 // denormal number - round to fewer bits 1001 r.prec = uint32(x.exp - dmin) 1002 } 1003 r.Set(x) 1004 1005 // Rounding may have caused r to overflow to ±Inf 1006 // (rounding never causes underflows to 0). 1007 if r.form == inf { 1008 r.exp = emax + 2 // cause overflow below 1009 } 1010 1011 if r.exp > emax+1 { 1012 // overflow 1013 if x.neg { 1014 return math.Inf(-1), Below 1015 } 1016 return math.Inf(+1), Above 1017 } 1018 // dmin+1 <= r.exp <= emax+1 1019 1020 var s uint64 1021 if r.neg { 1022 s = 1 << (fbits - 1) 1023 } 1024 1025 m := high64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit) 1026 1027 // Rounding may have caused a denormal number to 1028 // become normal. Check again. 1029 c := 1.0 1030 if r.exp < emin+1 { 1031 // denormal number 1032 r.exp += mbits 1033 c = 1.0 / (1 << mbits) // 2**-mbits 1034 } 1035 // emin+1 <= r.exp <= emax+1 1036 e := uint64(r.exp-emin) << mbits 1037 1038 return c * math.Float64frombits(s|e|m), r.acc 1039 1040 case zero: 1041 if x.neg { 1042 var z float64 1043 return -z, Exact 1044 } 1045 return 0.0, Exact 1046 1047 case inf: 1048 if x.neg { 1049 return math.Inf(-1), Exact 1050 } 1051 return math.Inf(+1), Exact 1052 } 1053 1054 panic("unreachable") 1055 } 1056 1057 // Int returns the result of truncating x towards zero; 1058 // or nil if x is an infinity. 1059 // The result is Exact if x.IsInt(); otherwise it is Below 1060 // for x > 0, and Above for x < 0. 1061 // If a non-nil *Int argument z is provided, Int stores 1062 // the result in z instead of allocating a new Int. 1063 func (x *Float) Int(z *Int) (*Int, Accuracy) { 1064 if debugFloat { 1065 x.validate() 1066 } 1067 1068 if z == nil && x.form <= finite { 1069 z = new(Int) 1070 } 1071 1072 switch x.form { 1073 case finite: 1074 // 0 < |x| < +Inf 1075 acc := makeAcc(x.neg) 1076 if x.exp <= 0 { 1077 // 0 < |x| < 1 1078 return z.SetInt64(0), acc 1079 } 1080 // x.exp > 0 1081 1082 // 1 <= |x| < +Inf 1083 // determine minimum required precision for x 1084 allBits := uint(len(x.mant)) * _W 1085 exp := uint(x.exp) 1086 if x.MinPrec() <= exp { 1087 acc = Exact 1088 } 1089 // shift mantissa as needed 1090 if z == nil { 1091 z = new(Int) 1092 } 1093 z.neg = x.neg 1094 switch { 1095 case exp > allBits: 1096 z.abs = z.abs.shl(x.mant, exp-allBits) 1097 default: 1098 z.abs = z.abs.set(x.mant) 1099 case exp < allBits: 1100 z.abs = z.abs.shr(x.mant, allBits-exp) 1101 } 1102 return z, acc 1103 1104 case zero: 1105 return z.SetInt64(0), Exact 1106 1107 case inf: 1108 return nil, makeAcc(x.neg) 1109 } 1110 1111 panic("unreachable") 1112 } 1113 1114 // Rat returns the rational number corresponding to x; 1115 // or nil if x is an infinity. 1116 // The result is Exact is x is not an Inf. 1117 // If a non-nil *Rat argument z is provided, Rat stores 1118 // the result in z instead of allocating a new Rat. 1119 func (x *Float) Rat(z *Rat) (*Rat, Accuracy) { 1120 if debugFloat { 1121 x.validate() 1122 } 1123 1124 if z == nil && x.form <= finite { 1125 z = new(Rat) 1126 } 1127 1128 switch x.form { 1129 case finite: 1130 // 0 < |x| < +Inf 1131 allBits := int32(len(x.mant)) * _W 1132 // build up numerator and denominator 1133 z.a.neg = x.neg 1134 switch { 1135 case x.exp > allBits: 1136 z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits)) 1137 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1138 // z already in normal form 1139 default: 1140 z.a.abs = z.a.abs.set(x.mant) 1141 z.b.abs = z.b.abs[:0] // == 1 (see Rat) 1142 // z already in normal form 1143 case x.exp < allBits: 1144 z.a.abs = z.a.abs.set(x.mant) 1145 t := z.b.abs.setUint64(1) 1146 z.b.abs = t.shl(t, uint(allBits-x.exp)) 1147 z.norm() 1148 } 1149 return z, Exact 1150 1151 case zero: 1152 return z.SetInt64(0), Exact 1153 1154 case inf: 1155 return nil, makeAcc(x.neg) 1156 } 1157 1158 panic("unreachable") 1159 } 1160 1161 // Abs sets z to the (possibly rounded) value |x| (the absolute value of x) 1162 // and returns z. 1163 func (z *Float) Abs(x *Float) *Float { 1164 z.Set(x) 1165 z.neg = false 1166 return z 1167 } 1168 1169 // Neg sets z to the (possibly rounded) value of x with its sign negated, 1170 // and returns z. 1171 func (z *Float) Neg(x *Float) *Float { 1172 z.Set(x) 1173 z.neg = !z.neg 1174 return z 1175 } 1176 1177 func validateBinaryOperands(x, y *Float) { 1178 if !debugFloat { 1179 // avoid performance bugs 1180 panic("validateBinaryOperands called but debugFloat is not set") 1181 } 1182 if len(x.mant) == 0 { 1183 panic("empty mantissa for x") 1184 } 1185 if len(y.mant) == 0 { 1186 panic("empty mantissa for y") 1187 } 1188 } 1189 1190 // z = x + y, ignoring signs of x and y for the addition 1191 // but using the sign of z for rounding the result. 1192 // x and y must have a non-empty mantissa and valid exponent. 1193 func (z *Float) uadd(x, y *Float) { 1194 // Note: This implementation requires 2 shifts most of the 1195 // time. It is also inefficient if exponents or precisions 1196 // differ by wide margins. The following article describes 1197 // an efficient (but much more complicated) implementation 1198 // compatible with the internal representation used here: 1199 // 1200 // Vincent Lefèvre: "The Generic Multiple-Precision Floating- 1201 // Point Addition With Exact Rounding (as in the MPFR Library)" 1202 // http://www.vinc17.net/research/papers/rnc6.pdf 1203 1204 if debugFloat { 1205 validateBinaryOperands(x, y) 1206 } 1207 1208 // compute exponents ex, ey for mantissa with "binary point" 1209 // on the right (mantissa.0) - use int64 to avoid overflow 1210 ex := int64(x.exp) - int64(len(x.mant))*_W 1211 ey := int64(y.exp) - int64(len(y.mant))*_W 1212 1213 // TODO(gri) having a combined add-and-shift primitive 1214 // could make this code significantly faster 1215 switch { 1216 case ex < ey: 1217 // cannot re-use z.mant w/o testing for aliasing 1218 t := nat(nil).shl(y.mant, uint(ey-ex)) 1219 z.mant = z.mant.add(x.mant, t) 1220 default: 1221 // ex == ey, no shift needed 1222 z.mant = z.mant.add(x.mant, y.mant) 1223 case ex > ey: 1224 // cannot re-use z.mant w/o testing for aliasing 1225 t := nat(nil).shl(x.mant, uint(ex-ey)) 1226 z.mant = z.mant.add(t, y.mant) 1227 ex = ey 1228 } 1229 // len(z.mant) > 0 1230 1231 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1232 } 1233 1234 // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction 1235 // but using the sign of z for rounding the result. 1236 // x and y must have a non-empty mantissa and valid exponent. 1237 func (z *Float) usub(x, y *Float) { 1238 // This code is symmetric to uadd. 1239 // We have not factored the common code out because 1240 // eventually uadd (and usub) should be optimized 1241 // by special-casing, and the code will diverge. 1242 1243 if debugFloat { 1244 validateBinaryOperands(x, y) 1245 } 1246 1247 ex := int64(x.exp) - int64(len(x.mant))*_W 1248 ey := int64(y.exp) - int64(len(y.mant))*_W 1249 1250 switch { 1251 case ex < ey: 1252 // cannot re-use z.mant w/o testing for aliasing 1253 t := nat(nil).shl(y.mant, uint(ey-ex)) 1254 z.mant = t.sub(x.mant, t) 1255 default: 1256 // ex == ey, no shift needed 1257 z.mant = z.mant.sub(x.mant, y.mant) 1258 case ex > ey: 1259 // cannot re-use z.mant w/o testing for aliasing 1260 t := nat(nil).shl(x.mant, uint(ex-ey)) 1261 z.mant = t.sub(t, y.mant) 1262 ex = ey 1263 } 1264 1265 // operands may have cancelled each other out 1266 if len(z.mant) == 0 { 1267 z.acc = Exact 1268 z.form = zero 1269 z.neg = false 1270 return 1271 } 1272 // len(z.mant) > 0 1273 1274 z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0) 1275 } 1276 1277 // z = x * y, ignoring signs of x and y for the multiplication 1278 // but using the sign of z for rounding the result. 1279 // x and y must have a non-empty mantissa and valid exponent. 1280 func (z *Float) umul(x, y *Float) { 1281 if debugFloat { 1282 validateBinaryOperands(x, y) 1283 } 1284 1285 // Note: This is doing too much work if the precision 1286 // of z is less than the sum of the precisions of x 1287 // and y which is often the case (e.g., if all floats 1288 // have the same precision). 1289 // TODO(gri) Optimize this for the common case. 1290 1291 e := int64(x.exp) + int64(y.exp) 1292 z.mant = z.mant.mul(x.mant, y.mant) 1293 1294 z.setExpAndRound(e-fnorm(z.mant), 0) 1295 } 1296 1297 // z = x / y, ignoring signs of x and y for the division 1298 // but using the sign of z for rounding the result. 1299 // x and y must have a non-empty mantissa and valid exponent. 1300 func (z *Float) uquo(x, y *Float) { 1301 if debugFloat { 1302 validateBinaryOperands(x, y) 1303 } 1304 1305 // mantissa length in words for desired result precision + 1 1306 // (at least one extra bit so we get the rounding bit after 1307 // the division) 1308 n := int(z.prec/_W) + 1 1309 1310 // compute adjusted x.mant such that we get enough result precision 1311 xadj := x.mant 1312 if d := n - len(x.mant) + len(y.mant); d > 0 { 1313 // d extra words needed => add d "0 digits" to x 1314 xadj = make(nat, len(x.mant)+d) 1315 copy(xadj[d:], x.mant) 1316 } 1317 // TODO(gri): If we have too many digits (d < 0), we should be able 1318 // to shorten x for faster division. But we must be extra careful 1319 // with rounding in that case. 1320 1321 // Compute d before division since there may be aliasing of x.mant 1322 // (via xadj) or y.mant with z.mant. 1323 d := len(xadj) - len(y.mant) 1324 1325 // divide 1326 var r nat 1327 z.mant, r = z.mant.div(nil, xadj, y.mant) 1328 e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W 1329 1330 // The result is long enough to include (at least) the rounding bit. 1331 // If there's a non-zero remainder, the corresponding fractional part 1332 // (if it were computed), would have a non-zero sticky bit (if it were 1333 // zero, it couldn't have a non-zero remainder). 1334 var sbit uint 1335 if len(r) > 0 { 1336 sbit = 1 1337 } 1338 1339 z.setExpAndRound(e-fnorm(z.mant), sbit) 1340 } 1341 1342 // ucmp returns -1, 0, or +1, depending on whether 1343 // |x| < |y|, |x| == |y|, or |x| > |y|. 1344 // x and y must have a non-empty mantissa and valid exponent. 1345 func (x *Float) ucmp(y *Float) int { 1346 if debugFloat { 1347 validateBinaryOperands(x, y) 1348 } 1349 1350 switch { 1351 case x.exp < y.exp: 1352 return -1 1353 case x.exp > y.exp: 1354 return +1 1355 } 1356 // x.exp == y.exp 1357 1358 // compare mantissas 1359 i := len(x.mant) 1360 j := len(y.mant) 1361 for i > 0 || j > 0 { 1362 var xm, ym Word 1363 if i > 0 { 1364 i-- 1365 xm = x.mant[i] 1366 } 1367 if j > 0 { 1368 j-- 1369 ym = y.mant[j] 1370 } 1371 switch { 1372 case xm < ym: 1373 return -1 1374 case xm > ym: 1375 return +1 1376 } 1377 } 1378 1379 return 0 1380 } 1381 1382 // Handling of sign bit as defined by IEEE 754-2008, section 6.3: 1383 // 1384 // When neither the inputs nor result are NaN, the sign of a product or 1385 // quotient is the exclusive OR of the operands’ signs; the sign of a sum, 1386 // or of a difference x−y regarded as a sum x+(−y), differs from at most 1387 // one of the addends’ signs; and the sign of the result of conversions, 1388 // the quantize operation, the roundToIntegral operations, and the 1389 // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand. 1390 // These rules shall apply even when operands or results are zero or infinite. 1391 // 1392 // When the sum of two operands with opposite signs (or the difference of 1393 // two operands with like signs) is exactly zero, the sign of that sum (or 1394 // difference) shall be +0 in all rounding-direction attributes except 1395 // roundTowardNegative; under that attribute, the sign of an exact zero 1396 // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same 1397 // sign as x even when x is zero. 1398 // 1399 // See also: http://play.golang.org/p/RtH3UCt5IH 1400 1401 // Add sets z to the rounded sum x+y and returns z. If z's precision is 0, 1402 // it is changed to the larger of x's or y's precision before the operation. 1403 // Rounding is performed according to z's precision and rounding mode; and 1404 // z's accuracy reports the result error relative to the exact (not rounded) 1405 // result. Add panics with ErrNaN if x and y are infinities with opposite 1406 // signs. The value of z is undefined in that case. 1407 // 1408 // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect. 1409 func (z *Float) Add(x, y *Float) *Float { 1410 if debugFloat { 1411 x.validate() 1412 y.validate() 1413 } 1414 1415 if z.prec == 0 { 1416 z.prec = umax32(x.prec, y.prec) 1417 } 1418 1419 if x.form == finite && y.form == finite { 1420 // x + y (commom case) 1421 z.neg = x.neg 1422 if x.neg == y.neg { 1423 // x + y == x + y 1424 // (-x) + (-y) == -(x + y) 1425 z.uadd(x, y) 1426 } else { 1427 // x + (-y) == x - y == -(y - x) 1428 // (-x) + y == y - x == -(x - y) 1429 if x.ucmp(y) > 0 { 1430 z.usub(x, y) 1431 } else { 1432 z.neg = !z.neg 1433 z.usub(y, x) 1434 } 1435 } 1436 return z 1437 } 1438 1439 if x.form == inf && y.form == inf && x.neg != y.neg { 1440 // +Inf + -Inf 1441 // -Inf + +Inf 1442 // value of z is undefined but make sure it's valid 1443 z.acc = Exact 1444 z.form = zero 1445 z.neg = false 1446 panic(ErrNaN{"addition of infinities with opposite signs"}) 1447 } 1448 1449 if x.form == zero && y.form == zero { 1450 // ±0 + ±0 1451 z.acc = Exact 1452 z.form = zero 1453 z.neg = x.neg && y.neg // -0 + -0 == -0 1454 return z 1455 } 1456 1457 if x.form == inf || y.form == zero { 1458 // ±Inf + y 1459 // x + ±0 1460 return z.Set(x) 1461 } 1462 1463 // ±0 + y 1464 // x + ±Inf 1465 return z.Set(y) 1466 } 1467 1468 // Sub sets z to the rounded difference x-y and returns z. 1469 // Precision, rounding, and accuracy reporting are as for Add. 1470 // Sub panics with ErrNaN if x and y are infinities with equal 1471 // signs. The value of z is undefined in that case. 1472 func (z *Float) Sub(x, y *Float) *Float { 1473 if debugFloat { 1474 x.validate() 1475 y.validate() 1476 } 1477 1478 if z.prec == 0 { 1479 z.prec = umax32(x.prec, y.prec) 1480 } 1481 1482 if x.form == finite && y.form == finite { 1483 // x - y (common case) 1484 z.neg = x.neg 1485 if x.neg != y.neg { 1486 // x - (-y) == x + y 1487 // (-x) - y == -(x + y) 1488 z.uadd(x, y) 1489 } else { 1490 // x - y == x - y == -(y - x) 1491 // (-x) - (-y) == y - x == -(x - y) 1492 if x.ucmp(y) > 0 { 1493 z.usub(x, y) 1494 } else { 1495 z.neg = !z.neg 1496 z.usub(y, x) 1497 } 1498 } 1499 return z 1500 } 1501 1502 if x.form == inf && y.form == inf && x.neg == y.neg { 1503 // +Inf - +Inf 1504 // -Inf - -Inf 1505 // value of z is undefined but make sure it's valid 1506 z.acc = Exact 1507 z.form = zero 1508 z.neg = false 1509 panic(ErrNaN{"subtraction of infinities with equal signs"}) 1510 } 1511 1512 if x.form == zero && y.form == zero { 1513 // ±0 - ±0 1514 z.acc = Exact 1515 z.form = zero 1516 z.neg = x.neg && !y.neg // -0 - +0 == -0 1517 return z 1518 } 1519 1520 if x.form == inf || y.form == zero { 1521 // ±Inf - y 1522 // x - ±0 1523 return z.Set(x) 1524 } 1525 1526 // ±0 - y 1527 // x - ±Inf 1528 return z.Neg(y) 1529 } 1530 1531 // Mul sets z to the rounded product x*y and returns z. 1532 // Precision, rounding, and accuracy reporting are as for Add. 1533 // Mul panics with ErrNaN if one operand is zero and the other 1534 // operand an infinity. The value of z is undefined in that case. 1535 func (z *Float) Mul(x, y *Float) *Float { 1536 if debugFloat { 1537 x.validate() 1538 y.validate() 1539 } 1540 1541 if z.prec == 0 { 1542 z.prec = umax32(x.prec, y.prec) 1543 } 1544 1545 z.neg = x.neg != y.neg 1546 1547 if x.form == finite && y.form == finite { 1548 // x * y (common case) 1549 z.umul(x, y) 1550 return z 1551 } 1552 1553 z.acc = Exact 1554 if x.form == zero && y.form == inf || x.form == inf && y.form == zero { 1555 // ±0 * ±Inf 1556 // ±Inf * ±0 1557 // value of z is undefined but make sure it's valid 1558 z.form = zero 1559 z.neg = false 1560 panic(ErrNaN{"multiplication of zero with infinity"}) 1561 } 1562 1563 if x.form == inf || y.form == inf { 1564 // ±Inf * y 1565 // x * ±Inf 1566 z.form = inf 1567 return z 1568 } 1569 1570 // ±0 * y 1571 // x * ±0 1572 z.form = zero 1573 return z 1574 } 1575 1576 // Quo sets z to the rounded quotient x/y and returns z. 1577 // Precision, rounding, and accuracy reporting are as for Add. 1578 // Quo panics with ErrNaN if both operands are zero or infinities. 1579 // The value of z is undefined in that case. 1580 func (z *Float) Quo(x, y *Float) *Float { 1581 if debugFloat { 1582 x.validate() 1583 y.validate() 1584 } 1585 1586 if z.prec == 0 { 1587 z.prec = umax32(x.prec, y.prec) 1588 } 1589 1590 z.neg = x.neg != y.neg 1591 1592 if x.form == finite && y.form == finite { 1593 // x / y (common case) 1594 z.uquo(x, y) 1595 return z 1596 } 1597 1598 z.acc = Exact 1599 if x.form == zero && y.form == zero || x.form == inf && y.form == inf { 1600 // ±0 / ±0 1601 // ±Inf / ±Inf 1602 // value of z is undefined but make sure it's valid 1603 z.form = zero 1604 z.neg = false 1605 panic(ErrNaN{"division of zero by zero or infinity by infinity"}) 1606 } 1607 1608 if x.form == zero || y.form == inf { 1609 // ±0 / y 1610 // x / ±Inf 1611 z.form = zero 1612 return z 1613 } 1614 1615 // x / ±0 1616 // ±Inf / y 1617 z.form = inf 1618 return z 1619 } 1620 1621 // Cmp compares x and y and returns: 1622 // 1623 // -1 if x < y 1624 // 0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf) 1625 // +1 if x > y 1626 // 1627 func (x *Float) Cmp(y *Float) int { 1628 if debugFloat { 1629 x.validate() 1630 y.validate() 1631 } 1632 1633 mx := x.ord() 1634 my := y.ord() 1635 switch { 1636 case mx < my: 1637 return -1 1638 case mx > my: 1639 return +1 1640 } 1641 // mx == my 1642 1643 // only if |mx| == 1 we have to compare the mantissae 1644 switch mx { 1645 case -1: 1646 return y.ucmp(x) 1647 case +1: 1648 return x.ucmp(y) 1649 } 1650 1651 return 0 1652 } 1653 1654 // ord classifies x and returns: 1655 // 1656 // -2 if -Inf == x 1657 // -1 if -Inf < x < 0 1658 // 0 if x == 0 (signed or unsigned) 1659 // +1 if 0 < x < +Inf 1660 // +2 if x == +Inf 1661 // 1662 func (x *Float) ord() int { 1663 var m int 1664 switch x.form { 1665 case finite: 1666 m = 1 1667 case zero: 1668 return 0 1669 case inf: 1670 m = 2 1671 } 1672 if x.neg { 1673 m = -m 1674 } 1675 return m 1676 } 1677 1678 func umax32(x, y uint32) uint32 { 1679 if x > y { 1680 return x 1681 } 1682 return y 1683 }