github.com/q45/go@v0.0.0-20151101211701-a4fb8c13db3f/src/cmd/compile/internal/big/float.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements multi-precision floating-point numbers.
     6  // Like in the GNU MPFR library (http://www.mpfr.org/), operands
     7  // can be of mixed precision. Unlike MPFR, the rounding mode is
     8  // not specified with each operation, but with each operand. The
     9  // rounding mode of the result operand determines the rounding
    10  // mode of an operation. This is a from-scratch implementation.
    11  
    12  package big
    13  
    14  import (
    15  	"fmt"
    16  	"math"
    17  )
    18  
    19  const debugFloat = false // enable for debugging
    20  
    21  // A nonzero finite Float represents a multi-precision floating point number
    22  //
    23  //   sign × mantissa × 2**exponent
    24  //
    25  // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
    26  // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
    27  // All Floats are ordered, and the ordering of two Floats x and y
    28  // is defined by x.Cmp(y).
    29  //
    30  // Each Float value also has a precision, rounding mode, and accuracy.
    31  // The precision is the maximum number of mantissa bits available to
    32  // represent the value. The rounding mode specifies how a result should
    33  // be rounded to fit into the mantissa bits, and accuracy describes the
    34  // rounding error with respect to the exact result.
    35  //
    36  // Unless specified otherwise, all operations (including setters) that
    37  // specify a *Float variable for the result (usually via the receiver
    38  // with the exception of MantExp), round the numeric result according
    39  // to the precision and rounding mode of the result variable.
    40  //
    41  // If the provided result precision is 0 (see below), it is set to the
    42  // precision of the argument with the largest precision value before any
    43  // rounding takes place, and the rounding mode remains unchanged. Thus,
    44  // uninitialized Floats provided as result arguments will have their
    45  // precision set to a reasonable value determined by the operands and
    46  // their mode is the zero value for RoundingMode (ToNearestEven).
    47  //
    48  // By setting the desired precision to 24 or 53 and using matching rounding
    49  // mode (typically ToNearestEven), Float operations produce the same results
    50  // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
    51  // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
    52  // Exponent underflow and overflow lead to a 0 or an Infinity for different
    53  // values than IEEE-754 because Float exponents have a much larger range.
    54  //
    55  // The zero (uninitialized) value for a Float is ready to use and represents
    56  // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
    57  //
    58  type Float struct {
    59  	prec uint32
    60  	mode RoundingMode
    61  	acc  Accuracy
    62  	form form
    63  	neg  bool
    64  	mant nat
    65  	exp  int32
    66  }
    67  
    68  // An ErrNaN panic is raised by a Float operation that would lead to
    69  // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
    70  type ErrNaN struct {
    71  	msg string
    72  }
    73  
    74  func (err ErrNaN) Error() string {
    75  	return err.msg
    76  }
    77  
    78  // NewFloat allocates and returns a new Float set to x,
    79  // with precision 53 and rounding mode ToNearestEven.
    80  // NewFloat panics with ErrNaN if x is a NaN.
    81  func NewFloat(x float64) *Float {
    82  	if math.IsNaN(x) {
    83  		panic(ErrNaN{"NewFloat(NaN)"})
    84  	}
    85  	return new(Float).SetFloat64(x)
    86  }
    87  
    88  // Exponent and precision limits.
    89  const (
    90  	MaxExp  = math.MaxInt32  // largest supported exponent
    91  	MinExp  = math.MinInt32  // smallest supported exponent
    92  	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
    93  )
    94  
    95  // Internal representation: The mantissa bits x.mant of a nonzero finite
    96  // Float x are stored in a nat slice long enough to hold up to x.prec bits;
    97  // the slice may (but doesn't have to) be shorter if the mantissa contains
    98  // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
    99  // the msb is shifted all the way "to the left"). Thus, if the mantissa has
   100  // trailing 0 bits or x.prec is not a multiple of the the Word size _W,
   101  // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
   102  // to the value 0.5; the exponent x.exp shifts the binary point as needed.
   103  //
   104  // A zero or non-finite Float x ignores x.mant and x.exp.
   105  //
   106  // x                 form      neg      mant         exp
   107  // ----------------------------------------------------------
   108  // ±0                zero      sign     -            -
   109  // 0 < |x| < +Inf    finite    sign     mantissa     exponent
   110  // ±Inf              inf       sign     -            -
   111  
   112  // A form value describes the internal representation.
   113  type form byte
   114  
   115  // The form value order is relevant - do not change!
   116  const (
   117  	zero form = iota
   118  	finite
   119  	inf
   120  )
   121  
   122  // RoundingMode determines how a Float value is rounded to the
   123  // desired precision. Rounding may change the Float value; the
   124  // rounding error is described by the Float's Accuracy.
   125  type RoundingMode byte
   126  
   127  // The following rounding modes are supported.
   128  const (
   129  	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
   130  	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
   131  	ToZero                            // == IEEE 754-2008 roundTowardZero
   132  	AwayFromZero                      // no IEEE 754-2008 equivalent
   133  	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
   134  	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
   135  )
   136  
   137  //go:generate stringer -type=RoundingMode
   138  
   139  // Accuracy describes the rounding error produced by the most recent
   140  // operation that generated a Float value, relative to the exact value.
   141  type Accuracy int8
   142  
   143  // Constants describing the Accuracy of a Float.
   144  const (
   145  	Below Accuracy = -1
   146  	Exact Accuracy = 0
   147  	Above Accuracy = +1
   148  )
   149  
   150  //go:generate stringer -type=Accuracy
   151  
   152  // SetPrec sets z's precision to prec and returns the (possibly) rounded
   153  // value of z. Rounding occurs according to z's rounding mode if the mantissa
   154  // cannot be represented in prec bits without loss of precision.
   155  // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
   156  // If prec > MaxPrec, it is set to MaxPrec.
   157  func (z *Float) SetPrec(prec uint) *Float {
   158  	z.acc = Exact // optimistically assume no rounding is needed
   159  
   160  	// special case
   161  	if prec == 0 {
   162  		z.prec = 0
   163  		if z.form == finite {
   164  			// truncate z to 0
   165  			z.acc = makeAcc(z.neg)
   166  			z.form = zero
   167  		}
   168  		return z
   169  	}
   170  
   171  	// general case
   172  	if prec > MaxPrec {
   173  		prec = MaxPrec
   174  	}
   175  	old := z.prec
   176  	z.prec = uint32(prec)
   177  	if z.prec < old {
   178  		z.round(0)
   179  	}
   180  	return z
   181  }
   182  
   183  func makeAcc(above bool) Accuracy {
   184  	if above {
   185  		return Above
   186  	}
   187  	return Below
   188  }
   189  
   190  // SetMode sets z's rounding mode to mode and returns an exact z.
   191  // z remains unchanged otherwise.
   192  // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
   193  func (z *Float) SetMode(mode RoundingMode) *Float {
   194  	z.mode = mode
   195  	z.acc = Exact
   196  	return z
   197  }
   198  
   199  // Prec returns the mantissa precision of x in bits.
   200  // The result may be 0 for |x| == 0 and |x| == Inf.
   201  func (x *Float) Prec() uint {
   202  	return uint(x.prec)
   203  }
   204  
   205  // MinPrec returns the minimum precision required to represent x exactly
   206  // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
   207  // The result is 0 for |x| == 0 and |x| == Inf.
   208  func (x *Float) MinPrec() uint {
   209  	if x.form != finite {
   210  		return 0
   211  	}
   212  	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
   213  }
   214  
   215  // Mode returns the rounding mode of x.
   216  func (x *Float) Mode() RoundingMode {
   217  	return x.mode
   218  }
   219  
   220  // Acc returns the accuracy of x produced by the most recent operation.
   221  func (x *Float) Acc() Accuracy {
   222  	return x.acc
   223  }
   224  
   225  // Sign returns:
   226  //
   227  //	-1 if x <   0
   228  //	 0 if x is ±0
   229  //	+1 if x >   0
   230  //
   231  func (x *Float) Sign() int {
   232  	if debugFloat {
   233  		x.validate()
   234  	}
   235  	if x.form == zero {
   236  		return 0
   237  	}
   238  	if x.neg {
   239  		return -1
   240  	}
   241  	return 1
   242  }
   243  
   244  // MantExp breaks x into its mantissa and exponent components
   245  // and returns the exponent. If a non-nil mant argument is
   246  // provided its value is set to the mantissa of x, with the
   247  // same precision and rounding mode as x. The components
   248  // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
   249  // Calling MantExp with a nil argument is an efficient way to
   250  // get the exponent of the receiver.
   251  //
   252  // Special cases are:
   253  //
   254  //	(  ±0).MantExp(mant) = 0, with mant set to   ±0
   255  //	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
   256  //
   257  // x and mant may be the same in which case x is set to its
   258  // mantissa value.
   259  func (x *Float) MantExp(mant *Float) (exp int) {
   260  	if debugFloat {
   261  		x.validate()
   262  	}
   263  	if x.form == finite {
   264  		exp = int(x.exp)
   265  	}
   266  	if mant != nil {
   267  		mant.Copy(x)
   268  		if mant.form == finite {
   269  			mant.exp = 0
   270  		}
   271  	}
   272  	return
   273  }
   274  
   275  func (z *Float) setExpAndRound(exp int64, sbit uint) {
   276  	if exp < MinExp {
   277  		// underflow
   278  		z.acc = makeAcc(z.neg)
   279  		z.form = zero
   280  		return
   281  	}
   282  
   283  	if exp > MaxExp {
   284  		// overflow
   285  		z.acc = makeAcc(!z.neg)
   286  		z.form = inf
   287  		return
   288  	}
   289  
   290  	z.form = finite
   291  	z.exp = int32(exp)
   292  	z.round(sbit)
   293  }
   294  
   295  // SetMantExp sets z to mant × 2**exp and and returns z.
   296  // The result z has the same precision and rounding mode
   297  // as mant. SetMantExp is an inverse of MantExp but does
   298  // not require 0.5 <= |mant| < 1.0. Specifically:
   299  //
   300  //	mant := new(Float)
   301  //	new(Float).SetMantExp(mant, x.SetMantExp(mant)).Cmp(x).Eql() is true
   302  //
   303  // Special cases are:
   304  //
   305  //	z.SetMantExp(  ±0, exp) =   ±0
   306  //	z.SetMantExp(±Inf, exp) = ±Inf
   307  //
   308  // z and mant may be the same in which case z's exponent
   309  // is set to exp.
   310  func (z *Float) SetMantExp(mant *Float, exp int) *Float {
   311  	if debugFloat {
   312  		z.validate()
   313  		mant.validate()
   314  	}
   315  	z.Copy(mant)
   316  	if z.form != finite {
   317  		return z
   318  	}
   319  	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
   320  	return z
   321  }
   322  
   323  // Signbit returns true if x is negative or negative zero.
   324  func (x *Float) Signbit() bool {
   325  	return x.neg
   326  }
   327  
   328  // IsInf reports whether x is +Inf or -Inf.
   329  func (x *Float) IsInf() bool {
   330  	return x.form == inf
   331  }
   332  
   333  // IsInt reports whether x is an integer.
   334  // ±Inf values are not integers.
   335  func (x *Float) IsInt() bool {
   336  	if debugFloat {
   337  		x.validate()
   338  	}
   339  	// special cases
   340  	if x.form != finite {
   341  		return x.form == zero
   342  	}
   343  	// x.form == finite
   344  	if x.exp <= 0 {
   345  		return false
   346  	}
   347  	// x.exp > 0
   348  	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
   349  }
   350  
   351  // debugging support
   352  func (x *Float) validate() {
   353  	if !debugFloat {
   354  		// avoid performance bugs
   355  		panic("validate called but debugFloat is not set")
   356  	}
   357  	if x.form != finite {
   358  		return
   359  	}
   360  	m := len(x.mant)
   361  	if m == 0 {
   362  		panic("nonzero finite number with empty mantissa")
   363  	}
   364  	const msb = 1 << (_W - 1)
   365  	if x.mant[m-1]&msb == 0 {
   366  		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
   367  	}
   368  	if x.prec == 0 {
   369  		panic("zero precision finite number")
   370  	}
   371  }
   372  
   373  // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
   374  // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
   375  // have before calling round. z's mantissa must be normalized (with the msb set)
   376  // or empty.
   377  //
   378  // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
   379  // sign of z. For correct rounding, the sign of z must be set correctly before
   380  // calling round.
   381  func (z *Float) round(sbit uint) {
   382  	if debugFloat {
   383  		z.validate()
   384  	}
   385  
   386  	z.acc = Exact
   387  	if z.form != finite {
   388  		// ±0 or ±Inf => nothing left to do
   389  		return
   390  	}
   391  	// z.form == finite && len(z.mant) > 0
   392  	// m > 0 implies z.prec > 0 (checked by validate)
   393  
   394  	m := uint32(len(z.mant)) // present mantissa length in words
   395  	bits := m * _W           // present mantissa bits
   396  	if bits <= z.prec {
   397  		// mantissa fits => nothing to do
   398  		return
   399  	}
   400  	// bits > z.prec
   401  
   402  	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
   403  
   404  	// Rounding is based on two bits: the rounding bit (rbit) and the
   405  	// sticky bit (sbit). The rbit is the bit immediately before the
   406  	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
   407  	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
   408  	//
   409  	//   rbit  sbit  => "fractional part"
   410  	//
   411  	//   0     0        == 0
   412  	//   0     1        >  0  , < 0.5
   413  	//   1     0        == 0.5
   414  	//   1     1        >  0.5, < 1.0
   415  
   416  	// bits > z.prec: mantissa too large => round
   417  	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
   418  	rbit := z.mant.bit(r)        // rounding bit
   419  	if sbit == 0 {
   420  		sbit = z.mant.sticky(r)
   421  	}
   422  	if debugFloat && sbit&^1 != 0 {
   423  		panic(fmt.Sprintf("invalid sbit %#x", sbit))
   424  	}
   425  
   426  	// convert ToXInf rounding modes
   427  	mode := z.mode
   428  	switch mode {
   429  	case ToNegativeInf:
   430  		mode = ToZero
   431  		if z.neg {
   432  			mode = AwayFromZero
   433  		}
   434  	case ToPositiveInf:
   435  		mode = AwayFromZero
   436  		if z.neg {
   437  			mode = ToZero
   438  		}
   439  	}
   440  
   441  	// cut off extra words
   442  	if m > n {
   443  		copy(z.mant, z.mant[m-n:]) // move n last words to front
   444  		z.mant = z.mant[:n]
   445  	}
   446  
   447  	// determine number of trailing zero bits t
   448  	t := n*_W - z.prec // 0 <= t < _W
   449  	lsb := Word(1) << t
   450  
   451  	// make rounding decision
   452  	// TODO(gri) This can be simplified (see Bits.round in bits_test.go).
   453  	switch mode {
   454  	case ToZero:
   455  		// nothing to do
   456  	case ToNearestEven, ToNearestAway:
   457  		if rbit == 0 {
   458  			// rounding bits == 0b0x
   459  			mode = ToZero
   460  		} else if sbit == 1 {
   461  			// rounding bits == 0b11
   462  			mode = AwayFromZero
   463  		}
   464  	case AwayFromZero:
   465  		if rbit|sbit == 0 {
   466  			mode = ToZero
   467  		}
   468  	default:
   469  		// ToXInf modes have been converted to ToZero or AwayFromZero
   470  		panic("unreachable")
   471  	}
   472  
   473  	// round and determine accuracy
   474  	switch mode {
   475  	case ToZero:
   476  		if rbit|sbit != 0 {
   477  			z.acc = Below
   478  		}
   479  
   480  	case ToNearestEven, ToNearestAway:
   481  		if debugFloat && rbit != 1 {
   482  			panic("internal error in rounding")
   483  		}
   484  		if mode == ToNearestEven && sbit == 0 && z.mant[0]&lsb == 0 {
   485  			z.acc = Below
   486  			break
   487  		}
   488  		// mode == ToNearestAway || sbit == 1 || z.mant[0]&lsb != 0
   489  		fallthrough
   490  
   491  	case AwayFromZero:
   492  		// add 1 to mantissa
   493  		if addVW(z.mant, z.mant, lsb) != 0 {
   494  			// overflow => shift mantissa right by 1 and add msb
   495  			shrVU(z.mant, z.mant, 1)
   496  			z.mant[n-1] |= 1 << (_W - 1)
   497  			// adjust exponent
   498  			if z.exp < MaxExp {
   499  				z.exp++
   500  			} else {
   501  				// exponent overflow
   502  				z.acc = makeAcc(!z.neg)
   503  				z.form = inf
   504  				return
   505  			}
   506  		}
   507  		z.acc = Above
   508  	}
   509  
   510  	// zero out trailing bits in least-significant word
   511  	z.mant[0] &^= lsb - 1
   512  
   513  	// update accuracy
   514  	if z.acc != Exact && z.neg {
   515  		z.acc = -z.acc
   516  	}
   517  
   518  	if debugFloat {
   519  		z.validate()
   520  	}
   521  
   522  	return
   523  }
   524  
   525  func (z *Float) setBits64(neg bool, x uint64) *Float {
   526  	if z.prec == 0 {
   527  		z.prec = 64
   528  	}
   529  	z.acc = Exact
   530  	z.neg = neg
   531  	if x == 0 {
   532  		z.form = zero
   533  		return z
   534  	}
   535  	// x != 0
   536  	z.form = finite
   537  	s := nlz64(x)
   538  	z.mant = z.mant.setUint64(x << s)
   539  	z.exp = int32(64 - s) // always fits
   540  	if z.prec < 64 {
   541  		z.round(0)
   542  	}
   543  	return z
   544  }
   545  
   546  // SetUint64 sets z to the (possibly rounded) value of x and returns z.
   547  // If z's precision is 0, it is changed to 64 (and rounding will have
   548  // no effect).
   549  func (z *Float) SetUint64(x uint64) *Float {
   550  	return z.setBits64(false, x)
   551  }
   552  
   553  // SetInt64 sets z to the (possibly rounded) value of x and returns z.
   554  // If z's precision is 0, it is changed to 64 (and rounding will have
   555  // no effect).
   556  func (z *Float) SetInt64(x int64) *Float {
   557  	u := x
   558  	if u < 0 {
   559  		u = -u
   560  	}
   561  	// We cannot simply call z.SetUint64(uint64(u)) and change
   562  	// the sign afterwards because the sign affects rounding.
   563  	return z.setBits64(x < 0, uint64(u))
   564  }
   565  
   566  // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
   567  // If z's precision is 0, it is changed to 53 (and rounding will have
   568  // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
   569  func (z *Float) SetFloat64(x float64) *Float {
   570  	if z.prec == 0 {
   571  		z.prec = 53
   572  	}
   573  	if math.IsNaN(x) {
   574  		panic(ErrNaN{"Float.SetFloat64(NaN)"})
   575  	}
   576  	z.acc = Exact
   577  	z.neg = math.Signbit(x) // handle -0, -Inf correctly
   578  	if x == 0 {
   579  		z.form = zero
   580  		return z
   581  	}
   582  	if math.IsInf(x, 0) {
   583  		z.form = inf
   584  		return z
   585  	}
   586  	// normalized x != 0
   587  	z.form = finite
   588  	fmant, exp := math.Frexp(x) // get normalized mantissa
   589  	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
   590  	z.exp = int32(exp) // always fits
   591  	if z.prec < 53 {
   592  		z.round(0)
   593  	}
   594  	return z
   595  }
   596  
   597  // fnorm normalizes mantissa m by shifting it to the left
   598  // such that the msb of the most-significant word (msw) is 1.
   599  // It returns the shift amount. It assumes that len(m) != 0.
   600  func fnorm(m nat) int64 {
   601  	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
   602  		panic("msw of mantissa is 0")
   603  	}
   604  	s := nlz(m[len(m)-1])
   605  	if s > 0 {
   606  		c := shlVU(m, m, s)
   607  		if debugFloat && c != 0 {
   608  			panic("nlz or shlVU incorrect")
   609  		}
   610  	}
   611  	return int64(s)
   612  }
   613  
   614  // SetInt sets z to the (possibly rounded) value of x and returns z.
   615  // If z's precision is 0, it is changed to the larger of x.BitLen()
   616  // or 64 (and rounding will have no effect).
   617  func (z *Float) SetInt(x *Int) *Float {
   618  	// TODO(gri) can be more efficient if z.prec > 0
   619  	// but small compared to the size of x, or if there
   620  	// are many trailing 0's.
   621  	bits := uint32(x.BitLen())
   622  	if z.prec == 0 {
   623  		z.prec = umax32(bits, 64)
   624  	}
   625  	z.acc = Exact
   626  	z.neg = x.neg
   627  	if len(x.abs) == 0 {
   628  		z.form = zero
   629  		return z
   630  	}
   631  	// x != 0
   632  	z.mant = z.mant.set(x.abs)
   633  	fnorm(z.mant)
   634  	z.setExpAndRound(int64(bits), 0)
   635  	return z
   636  }
   637  
   638  // SetRat sets z to the (possibly rounded) value of x and returns z.
   639  // If z's precision is 0, it is changed to the largest of a.BitLen(),
   640  // b.BitLen(), or 64; with x = a/b.
   641  func (z *Float) SetRat(x *Rat) *Float {
   642  	if x.IsInt() {
   643  		return z.SetInt(x.Num())
   644  	}
   645  	var a, b Float
   646  	a.SetInt(x.Num())
   647  	b.SetInt(x.Denom())
   648  	if z.prec == 0 {
   649  		z.prec = umax32(a.prec, b.prec)
   650  	}
   651  	return z.Quo(&a, &b)
   652  }
   653  
   654  // SetInf sets z to the infinite Float -Inf if signbit is
   655  // set, or +Inf if signbit is not set, and returns z. The
   656  // precision of z is unchanged and the result is always
   657  // Exact.
   658  func (z *Float) SetInf(signbit bool) *Float {
   659  	z.acc = Exact
   660  	z.form = inf
   661  	z.neg = signbit
   662  	return z
   663  }
   664  
   665  // Set sets z to the (possibly rounded) value of x and returns z.
   666  // If z's precision is 0, it is changed to the precision of x
   667  // before setting z (and rounding will have no effect).
   668  // Rounding is performed according to z's precision and rounding
   669  // mode; and z's accuracy reports the result error relative to the
   670  // exact (not rounded) result.
   671  func (z *Float) Set(x *Float) *Float {
   672  	if debugFloat {
   673  		x.validate()
   674  	}
   675  	z.acc = Exact
   676  	if z != x {
   677  		z.form = x.form
   678  		z.neg = x.neg
   679  		if x.form == finite {
   680  			z.exp = x.exp
   681  			z.mant = z.mant.set(x.mant)
   682  		}
   683  		if z.prec == 0 {
   684  			z.prec = x.prec
   685  		} else if z.prec < x.prec {
   686  			z.round(0)
   687  		}
   688  	}
   689  	return z
   690  }
   691  
   692  // Copy sets z to x, with the same precision, rounding mode, and
   693  // accuracy as x, and returns z. x is not changed even if z and
   694  // x are the same.
   695  func (z *Float) Copy(x *Float) *Float {
   696  	if debugFloat {
   697  		x.validate()
   698  	}
   699  	if z != x {
   700  		z.prec = x.prec
   701  		z.mode = x.mode
   702  		z.acc = x.acc
   703  		z.form = x.form
   704  		z.neg = x.neg
   705  		if z.form == finite {
   706  			z.mant = z.mant.set(x.mant)
   707  			z.exp = x.exp
   708  		}
   709  	}
   710  	return z
   711  }
   712  
   713  // msb32 returns the 32 most significant bits of x.
   714  func msb32(x nat) uint32 {
   715  	i := len(x) - 1
   716  	if i < 0 {
   717  		return 0
   718  	}
   719  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   720  		panic("x not normalized")
   721  	}
   722  	switch _W {
   723  	case 32:
   724  		return uint32(x[i])
   725  	case 64:
   726  		return uint32(x[i] >> 32)
   727  	}
   728  	panic("unreachable")
   729  }
   730  
   731  // msb64 returns the 64 most significant bits of x.
   732  func msb64(x nat) uint64 {
   733  	i := len(x) - 1
   734  	if i < 0 {
   735  		return 0
   736  	}
   737  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   738  		panic("x not normalized")
   739  	}
   740  	switch _W {
   741  	case 32:
   742  		v := uint64(x[i]) << 32
   743  		if i > 0 {
   744  			v |= uint64(x[i-1])
   745  		}
   746  		return v
   747  	case 64:
   748  		return uint64(x[i])
   749  	}
   750  	panic("unreachable")
   751  }
   752  
   753  // Uint64 returns the unsigned integer resulting from truncating x
   754  // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
   755  // if x is an integer and Below otherwise.
   756  // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
   757  // for x > math.MaxUint64.
   758  func (x *Float) Uint64() (uint64, Accuracy) {
   759  	if debugFloat {
   760  		x.validate()
   761  	}
   762  
   763  	switch x.form {
   764  	case finite:
   765  		if x.neg {
   766  			return 0, Above
   767  		}
   768  		// 0 < x < +Inf
   769  		if x.exp <= 0 {
   770  			// 0 < x < 1
   771  			return 0, Below
   772  		}
   773  		// 1 <= x < Inf
   774  		if x.exp <= 64 {
   775  			// u = trunc(x) fits into a uint64
   776  			u := msb64(x.mant) >> (64 - uint32(x.exp))
   777  			if x.MinPrec() <= 64 {
   778  				return u, Exact
   779  			}
   780  			return u, Below // x truncated
   781  		}
   782  		// x too large
   783  		return math.MaxUint64, Below
   784  
   785  	case zero:
   786  		return 0, Exact
   787  
   788  	case inf:
   789  		if x.neg {
   790  			return 0, Above
   791  		}
   792  		return math.MaxUint64, Below
   793  	}
   794  
   795  	panic("unreachable")
   796  }
   797  
   798  // Int64 returns the integer resulting from truncating x towards zero.
   799  // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
   800  // an integer, and Above (x < 0) or Below (x > 0) otherwise.
   801  // The result is (math.MinInt64, Above) for x < math.MinInt64,
   802  // and (math.MaxInt64, Below) for x > math.MaxInt64.
   803  func (x *Float) Int64() (int64, Accuracy) {
   804  	if debugFloat {
   805  		x.validate()
   806  	}
   807  
   808  	switch x.form {
   809  	case finite:
   810  		// 0 < |x| < +Inf
   811  		acc := makeAcc(x.neg)
   812  		if x.exp <= 0 {
   813  			// 0 < |x| < 1
   814  			return 0, acc
   815  		}
   816  		// x.exp > 0
   817  
   818  		// 1 <= |x| < +Inf
   819  		if x.exp <= 63 {
   820  			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
   821  			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
   822  			if x.neg {
   823  				i = -i
   824  			}
   825  			if x.MinPrec() <= uint(x.exp) {
   826  				return i, Exact
   827  			}
   828  			return i, acc // x truncated
   829  		}
   830  		if x.neg {
   831  			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
   832  			if x.exp == 64 && x.MinPrec() == 1 {
   833  				acc = Exact
   834  			}
   835  			return math.MinInt64, acc
   836  		}
   837  		// x too large
   838  		return math.MaxInt64, Below
   839  
   840  	case zero:
   841  		return 0, Exact
   842  
   843  	case inf:
   844  		if x.neg {
   845  			return math.MinInt64, Above
   846  		}
   847  		return math.MaxInt64, Below
   848  	}
   849  
   850  	panic("unreachable")
   851  }
   852  
   853  // Float32 returns the float32 value nearest to x. If x is too small to be
   854  // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
   855  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   856  // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
   857  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   858  func (x *Float) Float32() (float32, Accuracy) {
   859  	if debugFloat {
   860  		x.validate()
   861  	}
   862  
   863  	switch x.form {
   864  	case finite:
   865  		// 0 < |x| < +Inf
   866  
   867  		const (
   868  			fbits = 32                //        float size
   869  			mbits = 23                //        mantissa size (excluding implicit msb)
   870  			ebits = fbits - mbits - 1 //     8  exponent size
   871  			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
   872  			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
   873  			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
   874  			emax  = bias              //   127  largest unbiased exponent (normal)
   875  		)
   876  
   877  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
   878  		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
   879  		p := mbits + 1 // precision of normal float
   880  
   881  		// If the exponent is too small, we may have a denormal number
   882  		// in which case we have fewer mantissa bits available: reduce
   883  		// precision accordingly.
   884  		if e < emin {
   885  			p -= emin - int(e)
   886  			// Make sure we have at least 1 bit so that we don't
   887  			// lose numbers rounded up to the smallest denormal.
   888  			if p < 1 {
   889  				p = 1
   890  			}
   891  		}
   892  
   893  		// round
   894  		var r Float
   895  		r.prec = uint32(p)
   896  		r.Set(x)
   897  		e = r.exp - 1
   898  
   899  		// Rounding may have caused r to overflow to ±Inf
   900  		// (rounding never causes underflows to 0).
   901  		if r.form == inf {
   902  			e = emax + 1 // cause overflow below
   903  		}
   904  
   905  		// If the exponent is too large, overflow to ±Inf.
   906  		if e > emax {
   907  			// overflow
   908  			if x.neg {
   909  				return float32(math.Inf(-1)), Below
   910  			}
   911  			return float32(math.Inf(+1)), Above
   912  		}
   913  		// e <= emax
   914  
   915  		// Determine sign, biased exponent, and mantissa.
   916  		var sign, bexp, mant uint32
   917  		if x.neg {
   918  			sign = 1 << (fbits - 1)
   919  		}
   920  
   921  		// Rounding may have caused a denormal number to
   922  		// become normal. Check again.
   923  		if e < emin {
   924  			// denormal number
   925  			if e < dmin {
   926  				// underflow to ±0
   927  				if x.neg {
   928  					var z float32
   929  					return -z, Above
   930  				}
   931  				return 0.0, Below
   932  			}
   933  			// bexp = 0
   934  			mant = msb32(r.mant) >> (fbits - r.prec)
   935  		} else {
   936  			// normal number: emin <= e <= emax
   937  			bexp = uint32(e+bias) << mbits
   938  			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   939  		}
   940  
   941  		return math.Float32frombits(sign | bexp | mant), r.acc
   942  
   943  	case zero:
   944  		if x.neg {
   945  			var z float32
   946  			return -z, Exact
   947  		}
   948  		return 0.0, Exact
   949  
   950  	case inf:
   951  		if x.neg {
   952  			return float32(math.Inf(-1)), Exact
   953  		}
   954  		return float32(math.Inf(+1)), Exact
   955  	}
   956  
   957  	panic("unreachable")
   958  }
   959  
   960  // Float64 returns the float64 value nearest to x. If x is too small to be
   961  // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
   962  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   963  // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
   964  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   965  func (x *Float) Float64() (float64, Accuracy) {
   966  	if debugFloat {
   967  		x.validate()
   968  	}
   969  
   970  	switch x.form {
   971  	case finite:
   972  		// 0 < |x| < +Inf
   973  
   974  		const (
   975  			fbits = 64                //        float size
   976  			mbits = 52                //        mantissa size (excluding implicit msb)
   977  			ebits = fbits - mbits - 1 //    11  exponent size
   978  			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
   979  			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
   980  			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
   981  			emax  = bias              //  1023  largest unbiased exponent (normal)
   982  		)
   983  
   984  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent for floatxx mantissa.
   985  		e := x.exp - 1 // exponent for mantissa m with 1.0 <= m < 2.0
   986  		p := mbits + 1 // precision of normal float
   987  
   988  		// If the exponent is too small, we may have a denormal number
   989  		// in which case we have fewer mantissa bits available: reduce
   990  		// precision accordingly.
   991  		if e < emin {
   992  			p -= emin - int(e)
   993  			// Make sure we have at least 1 bit so that we don't
   994  			// lose numbers rounded up to the smallest denormal.
   995  			if p < 1 {
   996  				p = 1
   997  			}
   998  		}
   999  
  1000  		// round
  1001  		var r Float
  1002  		r.prec = uint32(p)
  1003  		r.Set(x)
  1004  		e = r.exp - 1
  1005  
  1006  		// Rounding may have caused r to overflow to ±Inf
  1007  		// (rounding never causes underflows to 0).
  1008  		if r.form == inf {
  1009  			e = emax + 1 // cause overflow below
  1010  		}
  1011  
  1012  		// If the exponent is too large, overflow to ±Inf.
  1013  		if e > emax {
  1014  			// overflow
  1015  			if x.neg {
  1016  				return math.Inf(-1), Below
  1017  			}
  1018  			return math.Inf(+1), Above
  1019  		}
  1020  		// e <= emax
  1021  
  1022  		// Determine sign, biased exponent, and mantissa.
  1023  		var sign, bexp, mant uint64
  1024  		if x.neg {
  1025  			sign = 1 << (fbits - 1)
  1026  		}
  1027  
  1028  		// Rounding may have caused a denormal number to
  1029  		// become normal. Check again.
  1030  		if e < emin {
  1031  			// denormal number
  1032  			if e < dmin {
  1033  				// underflow to ±0
  1034  				if x.neg {
  1035  					var z float64
  1036  					return -z, Above
  1037  				}
  1038  				return 0.0, Below
  1039  			}
  1040  			// bexp = 0
  1041  			mant = msb64(r.mant) >> (fbits - r.prec)
  1042  		} else {
  1043  			// normal number: emin <= e <= emax
  1044  			bexp = uint64(e+bias) << mbits
  1045  			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  1046  		}
  1047  
  1048  		return math.Float64frombits(sign | bexp | mant), r.acc
  1049  
  1050  	case zero:
  1051  		if x.neg {
  1052  			var z float64
  1053  			return -z, Exact
  1054  		}
  1055  		return 0.0, Exact
  1056  
  1057  	case inf:
  1058  		if x.neg {
  1059  			return math.Inf(-1), Exact
  1060  		}
  1061  		return math.Inf(+1), Exact
  1062  	}
  1063  
  1064  	panic("unreachable")
  1065  }
  1066  
  1067  // Int returns the result of truncating x towards zero;
  1068  // or nil if x is an infinity.
  1069  // The result is Exact if x.IsInt(); otherwise it is Below
  1070  // for x > 0, and Above for x < 0.
  1071  // If a non-nil *Int argument z is provided, Int stores
  1072  // the result in z instead of allocating a new Int.
  1073  func (x *Float) Int(z *Int) (*Int, Accuracy) {
  1074  	if debugFloat {
  1075  		x.validate()
  1076  	}
  1077  
  1078  	if z == nil && x.form <= finite {
  1079  		z = new(Int)
  1080  	}
  1081  
  1082  	switch x.form {
  1083  	case finite:
  1084  		// 0 < |x| < +Inf
  1085  		acc := makeAcc(x.neg)
  1086  		if x.exp <= 0 {
  1087  			// 0 < |x| < 1
  1088  			return z.SetInt64(0), acc
  1089  		}
  1090  		// x.exp > 0
  1091  
  1092  		// 1 <= |x| < +Inf
  1093  		// determine minimum required precision for x
  1094  		allBits := uint(len(x.mant)) * _W
  1095  		exp := uint(x.exp)
  1096  		if x.MinPrec() <= exp {
  1097  			acc = Exact
  1098  		}
  1099  		// shift mantissa as needed
  1100  		if z == nil {
  1101  			z = new(Int)
  1102  		}
  1103  		z.neg = x.neg
  1104  		switch {
  1105  		case exp > allBits:
  1106  			z.abs = z.abs.shl(x.mant, exp-allBits)
  1107  		default:
  1108  			z.abs = z.abs.set(x.mant)
  1109  		case exp < allBits:
  1110  			z.abs = z.abs.shr(x.mant, allBits-exp)
  1111  		}
  1112  		return z, acc
  1113  
  1114  	case zero:
  1115  		return z.SetInt64(0), Exact
  1116  
  1117  	case inf:
  1118  		return nil, makeAcc(x.neg)
  1119  	}
  1120  
  1121  	panic("unreachable")
  1122  }
  1123  
  1124  // Rat returns the rational number corresponding to x;
  1125  // or nil if x is an infinity.
  1126  // The result is Exact is x is not an Inf.
  1127  // If a non-nil *Rat argument z is provided, Rat stores
  1128  // the result in z instead of allocating a new Rat.
  1129  func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1130  	if debugFloat {
  1131  		x.validate()
  1132  	}
  1133  
  1134  	if z == nil && x.form <= finite {
  1135  		z = new(Rat)
  1136  	}
  1137  
  1138  	switch x.form {
  1139  	case finite:
  1140  		// 0 < |x| < +Inf
  1141  		allBits := int32(len(x.mant)) * _W
  1142  		// build up numerator and denominator
  1143  		z.a.neg = x.neg
  1144  		switch {
  1145  		case x.exp > allBits:
  1146  			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1147  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1148  			// z already in normal form
  1149  		default:
  1150  			z.a.abs = z.a.abs.set(x.mant)
  1151  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1152  			// z already in normal form
  1153  		case x.exp < allBits:
  1154  			z.a.abs = z.a.abs.set(x.mant)
  1155  			t := z.b.abs.setUint64(1)
  1156  			z.b.abs = t.shl(t, uint(allBits-x.exp))
  1157  			z.norm()
  1158  		}
  1159  		return z, Exact
  1160  
  1161  	case zero:
  1162  		return z.SetInt64(0), Exact
  1163  
  1164  	case inf:
  1165  		return nil, makeAcc(x.neg)
  1166  	}
  1167  
  1168  	panic("unreachable")
  1169  }
  1170  
  1171  // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1172  // and returns z.
  1173  func (z *Float) Abs(x *Float) *Float {
  1174  	z.Set(x)
  1175  	z.neg = false
  1176  	return z
  1177  }
  1178  
  1179  // Neg sets z to the (possibly rounded) value of x with its sign negated,
  1180  // and returns z.
  1181  func (z *Float) Neg(x *Float) *Float {
  1182  	z.Set(x)
  1183  	z.neg = !z.neg
  1184  	return z
  1185  }
  1186  
  1187  func validateBinaryOperands(x, y *Float) {
  1188  	if !debugFloat {
  1189  		// avoid performance bugs
  1190  		panic("validateBinaryOperands called but debugFloat is not set")
  1191  	}
  1192  	if len(x.mant) == 0 {
  1193  		panic("empty mantissa for x")
  1194  	}
  1195  	if len(y.mant) == 0 {
  1196  		panic("empty mantissa for y")
  1197  	}
  1198  }
  1199  
  1200  // z = x + y, ignoring signs of x and y for the addition
  1201  // but using the sign of z for rounding the result.
  1202  // x and y must have a non-empty mantissa and valid exponent.
  1203  func (z *Float) uadd(x, y *Float) {
  1204  	// Note: This implementation requires 2 shifts most of the
  1205  	// time. It is also inefficient if exponents or precisions
  1206  	// differ by wide margins. The following article describes
  1207  	// an efficient (but much more complicated) implementation
  1208  	// compatible with the internal representation used here:
  1209  	//
  1210  	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1211  	// Point Addition With Exact Rounding (as in the MPFR Library)"
  1212  	// http://www.vinc17.net/research/papers/rnc6.pdf
  1213  
  1214  	if debugFloat {
  1215  		validateBinaryOperands(x, y)
  1216  	}
  1217  
  1218  	// compute exponents ex, ey for mantissa with "binary point"
  1219  	// on the right (mantissa.0) - use int64 to avoid overflow
  1220  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1221  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1222  
  1223  	// TODO(gri) having a combined add-and-shift primitive
  1224  	//           could make this code significantly faster
  1225  	switch {
  1226  	case ex < ey:
  1227  		// cannot re-use z.mant w/o testing for aliasing
  1228  		t := nat(nil).shl(y.mant, uint(ey-ex))
  1229  		z.mant = z.mant.add(x.mant, t)
  1230  	default:
  1231  		// ex == ey, no shift needed
  1232  		z.mant = z.mant.add(x.mant, y.mant)
  1233  	case ex > ey:
  1234  		// cannot re-use z.mant w/o testing for aliasing
  1235  		t := nat(nil).shl(x.mant, uint(ex-ey))
  1236  		z.mant = z.mant.add(t, y.mant)
  1237  		ex = ey
  1238  	}
  1239  	// len(z.mant) > 0
  1240  
  1241  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1242  }
  1243  
  1244  // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1245  // but using the sign of z for rounding the result.
  1246  // x and y must have a non-empty mantissa and valid exponent.
  1247  func (z *Float) usub(x, y *Float) {
  1248  	// This code is symmetric to uadd.
  1249  	// We have not factored the common code out because
  1250  	// eventually uadd (and usub) should be optimized
  1251  	// by special-casing, and the code will diverge.
  1252  
  1253  	if debugFloat {
  1254  		validateBinaryOperands(x, y)
  1255  	}
  1256  
  1257  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1258  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1259  
  1260  	switch {
  1261  	case ex < ey:
  1262  		// cannot re-use z.mant w/o testing for aliasing
  1263  		t := nat(nil).shl(y.mant, uint(ey-ex))
  1264  		z.mant = t.sub(x.mant, t)
  1265  	default:
  1266  		// ex == ey, no shift needed
  1267  		z.mant = z.mant.sub(x.mant, y.mant)
  1268  	case ex > ey:
  1269  		// cannot re-use z.mant w/o testing for aliasing
  1270  		t := nat(nil).shl(x.mant, uint(ex-ey))
  1271  		z.mant = t.sub(t, y.mant)
  1272  		ex = ey
  1273  	}
  1274  
  1275  	// operands may have canceled each other out
  1276  	if len(z.mant) == 0 {
  1277  		z.acc = Exact
  1278  		z.form = zero
  1279  		z.neg = false
  1280  		return
  1281  	}
  1282  	// len(z.mant) > 0
  1283  
  1284  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1285  }
  1286  
  1287  // z = x * y, ignoring signs of x and y for the multiplication
  1288  // but using the sign of z for rounding the result.
  1289  // x and y must have a non-empty mantissa and valid exponent.
  1290  func (z *Float) umul(x, y *Float) {
  1291  	if debugFloat {
  1292  		validateBinaryOperands(x, y)
  1293  	}
  1294  
  1295  	// Note: This is doing too much work if the precision
  1296  	// of z is less than the sum of the precisions of x
  1297  	// and y which is often the case (e.g., if all floats
  1298  	// have the same precision).
  1299  	// TODO(gri) Optimize this for the common case.
  1300  
  1301  	e := int64(x.exp) + int64(y.exp)
  1302  	z.mant = z.mant.mul(x.mant, y.mant)
  1303  
  1304  	z.setExpAndRound(e-fnorm(z.mant), 0)
  1305  }
  1306  
  1307  // z = x / y, ignoring signs of x and y for the division
  1308  // but using the sign of z for rounding the result.
  1309  // x and y must have a non-empty mantissa and valid exponent.
  1310  func (z *Float) uquo(x, y *Float) {
  1311  	if debugFloat {
  1312  		validateBinaryOperands(x, y)
  1313  	}
  1314  
  1315  	// mantissa length in words for desired result precision + 1
  1316  	// (at least one extra bit so we get the rounding bit after
  1317  	// the division)
  1318  	n := int(z.prec/_W) + 1
  1319  
  1320  	// compute adjusted x.mant such that we get enough result precision
  1321  	xadj := x.mant
  1322  	if d := n - len(x.mant) + len(y.mant); d > 0 {
  1323  		// d extra words needed => add d "0 digits" to x
  1324  		xadj = make(nat, len(x.mant)+d)
  1325  		copy(xadj[d:], x.mant)
  1326  	}
  1327  	// TODO(gri): If we have too many digits (d < 0), we should be able
  1328  	// to shorten x for faster division. But we must be extra careful
  1329  	// with rounding in that case.
  1330  
  1331  	// Compute d before division since there may be aliasing of x.mant
  1332  	// (via xadj) or y.mant with z.mant.
  1333  	d := len(xadj) - len(y.mant)
  1334  
  1335  	// divide
  1336  	var r nat
  1337  	z.mant, r = z.mant.div(nil, xadj, y.mant)
  1338  	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1339  
  1340  	// The result is long enough to include (at least) the rounding bit.
  1341  	// If there's a non-zero remainder, the corresponding fractional part
  1342  	// (if it were computed), would have a non-zero sticky bit (if it were
  1343  	// zero, it couldn't have a non-zero remainder).
  1344  	var sbit uint
  1345  	if len(r) > 0 {
  1346  		sbit = 1
  1347  	}
  1348  
  1349  	z.setExpAndRound(e-fnorm(z.mant), sbit)
  1350  }
  1351  
  1352  // ucmp returns -1, 0, or +1, depending on whether
  1353  // |x| < |y|, |x| == |y|, or |x| > |y|.
  1354  // x and y must have a non-empty mantissa and valid exponent.
  1355  func (x *Float) ucmp(y *Float) int {
  1356  	if debugFloat {
  1357  		validateBinaryOperands(x, y)
  1358  	}
  1359  
  1360  	switch {
  1361  	case x.exp < y.exp:
  1362  		return -1
  1363  	case x.exp > y.exp:
  1364  		return +1
  1365  	}
  1366  	// x.exp == y.exp
  1367  
  1368  	// compare mantissas
  1369  	i := len(x.mant)
  1370  	j := len(y.mant)
  1371  	for i > 0 || j > 0 {
  1372  		var xm, ym Word
  1373  		if i > 0 {
  1374  			i--
  1375  			xm = x.mant[i]
  1376  		}
  1377  		if j > 0 {
  1378  			j--
  1379  			ym = y.mant[j]
  1380  		}
  1381  		switch {
  1382  		case xm < ym:
  1383  			return -1
  1384  		case xm > ym:
  1385  			return +1
  1386  		}
  1387  	}
  1388  
  1389  	return 0
  1390  }
  1391  
  1392  // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1393  //
  1394  // When neither the inputs nor result are NaN, the sign of a product or
  1395  // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1396  // or of a difference x−y regarded as a sum x+(−y), differs from at most
  1397  // one of the addends’ signs; and the sign of the result of conversions,
  1398  // the quantize operation, the roundToIntegral operations, and the
  1399  // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1400  // These rules shall apply even when operands or results are zero or infinite.
  1401  //
  1402  // When the sum of two operands with opposite signs (or the difference of
  1403  // two operands with like signs) is exactly zero, the sign of that sum (or
  1404  // difference) shall be +0 in all rounding-direction attributes except
  1405  // roundTowardNegative; under that attribute, the sign of an exact zero
  1406  // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1407  // sign as x even when x is zero.
  1408  //
  1409  // See also: https://play.golang.org/p/RtH3UCt5IH
  1410  
  1411  // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1412  // it is changed to the larger of x's or y's precision before the operation.
  1413  // Rounding is performed according to z's precision and rounding mode; and
  1414  // z's accuracy reports the result error relative to the exact (not rounded)
  1415  // result. Add panics with ErrNaN if x and y are infinities with opposite
  1416  // signs. The value of z is undefined in that case.
  1417  //
  1418  // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
  1419  func (z *Float) Add(x, y *Float) *Float {
  1420  	if debugFloat {
  1421  		x.validate()
  1422  		y.validate()
  1423  	}
  1424  
  1425  	if z.prec == 0 {
  1426  		z.prec = umax32(x.prec, y.prec)
  1427  	}
  1428  
  1429  	if x.form == finite && y.form == finite {
  1430  		// x + y (commom case)
  1431  		z.neg = x.neg
  1432  		if x.neg == y.neg {
  1433  			// x + y == x + y
  1434  			// (-x) + (-y) == -(x + y)
  1435  			z.uadd(x, y)
  1436  		} else {
  1437  			// x + (-y) == x - y == -(y - x)
  1438  			// (-x) + y == y - x == -(x - y)
  1439  			if x.ucmp(y) > 0 {
  1440  				z.usub(x, y)
  1441  			} else {
  1442  				z.neg = !z.neg
  1443  				z.usub(y, x)
  1444  			}
  1445  		}
  1446  		return z
  1447  	}
  1448  
  1449  	if x.form == inf && y.form == inf && x.neg != y.neg {
  1450  		// +Inf + -Inf
  1451  		// -Inf + +Inf
  1452  		// value of z is undefined but make sure it's valid
  1453  		z.acc = Exact
  1454  		z.form = zero
  1455  		z.neg = false
  1456  		panic(ErrNaN{"addition of infinities with opposite signs"})
  1457  	}
  1458  
  1459  	if x.form == zero && y.form == zero {
  1460  		// ±0 + ±0
  1461  		z.acc = Exact
  1462  		z.form = zero
  1463  		z.neg = x.neg && y.neg // -0 + -0 == -0
  1464  		return z
  1465  	}
  1466  
  1467  	if x.form == inf || y.form == zero {
  1468  		// ±Inf + y
  1469  		// x + ±0
  1470  		return z.Set(x)
  1471  	}
  1472  
  1473  	// ±0 + y
  1474  	// x + ±Inf
  1475  	return z.Set(y)
  1476  }
  1477  
  1478  // Sub sets z to the rounded difference x-y and returns z.
  1479  // Precision, rounding, and accuracy reporting are as for Add.
  1480  // Sub panics with ErrNaN if x and y are infinities with equal
  1481  // signs. The value of z is undefined in that case.
  1482  func (z *Float) Sub(x, y *Float) *Float {
  1483  	if debugFloat {
  1484  		x.validate()
  1485  		y.validate()
  1486  	}
  1487  
  1488  	if z.prec == 0 {
  1489  		z.prec = umax32(x.prec, y.prec)
  1490  	}
  1491  
  1492  	if x.form == finite && y.form == finite {
  1493  		// x - y (common case)
  1494  		z.neg = x.neg
  1495  		if x.neg != y.neg {
  1496  			// x - (-y) == x + y
  1497  			// (-x) - y == -(x + y)
  1498  			z.uadd(x, y)
  1499  		} else {
  1500  			// x - y == x - y == -(y - x)
  1501  			// (-x) - (-y) == y - x == -(x - y)
  1502  			if x.ucmp(y) > 0 {
  1503  				z.usub(x, y)
  1504  			} else {
  1505  				z.neg = !z.neg
  1506  				z.usub(y, x)
  1507  			}
  1508  		}
  1509  		return z
  1510  	}
  1511  
  1512  	if x.form == inf && y.form == inf && x.neg == y.neg {
  1513  		// +Inf - +Inf
  1514  		// -Inf - -Inf
  1515  		// value of z is undefined but make sure it's valid
  1516  		z.acc = Exact
  1517  		z.form = zero
  1518  		z.neg = false
  1519  		panic(ErrNaN{"subtraction of infinities with equal signs"})
  1520  	}
  1521  
  1522  	if x.form == zero && y.form == zero {
  1523  		// ±0 - ±0
  1524  		z.acc = Exact
  1525  		z.form = zero
  1526  		z.neg = x.neg && !y.neg // -0 - +0 == -0
  1527  		return z
  1528  	}
  1529  
  1530  	if x.form == inf || y.form == zero {
  1531  		// ±Inf - y
  1532  		// x - ±0
  1533  		return z.Set(x)
  1534  	}
  1535  
  1536  	// ±0 - y
  1537  	// x - ±Inf
  1538  	return z.Neg(y)
  1539  }
  1540  
  1541  // Mul sets z to the rounded product x*y and returns z.
  1542  // Precision, rounding, and accuracy reporting are as for Add.
  1543  // Mul panics with ErrNaN if one operand is zero and the other
  1544  // operand an infinity. The value of z is undefined in that case.
  1545  func (z *Float) Mul(x, y *Float) *Float {
  1546  	if debugFloat {
  1547  		x.validate()
  1548  		y.validate()
  1549  	}
  1550  
  1551  	if z.prec == 0 {
  1552  		z.prec = umax32(x.prec, y.prec)
  1553  	}
  1554  
  1555  	z.neg = x.neg != y.neg
  1556  
  1557  	if x.form == finite && y.form == finite {
  1558  		// x * y (common case)
  1559  		z.umul(x, y)
  1560  		return z
  1561  	}
  1562  
  1563  	z.acc = Exact
  1564  	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1565  		// ±0 * ±Inf
  1566  		// ±Inf * ±0
  1567  		// value of z is undefined but make sure it's valid
  1568  		z.form = zero
  1569  		z.neg = false
  1570  		panic(ErrNaN{"multiplication of zero with infinity"})
  1571  	}
  1572  
  1573  	if x.form == inf || y.form == inf {
  1574  		// ±Inf * y
  1575  		// x * ±Inf
  1576  		z.form = inf
  1577  		return z
  1578  	}
  1579  
  1580  	// ±0 * y
  1581  	// x * ±0
  1582  	z.form = zero
  1583  	return z
  1584  }
  1585  
  1586  // Quo sets z to the rounded quotient x/y and returns z.
  1587  // Precision, rounding, and accuracy reporting are as for Add.
  1588  // Quo panics with ErrNaN if both operands are zero or infinities.
  1589  // The value of z is undefined in that case.
  1590  func (z *Float) Quo(x, y *Float) *Float {
  1591  	if debugFloat {
  1592  		x.validate()
  1593  		y.validate()
  1594  	}
  1595  
  1596  	if z.prec == 0 {
  1597  		z.prec = umax32(x.prec, y.prec)
  1598  	}
  1599  
  1600  	z.neg = x.neg != y.neg
  1601  
  1602  	if x.form == finite && y.form == finite {
  1603  		// x / y (common case)
  1604  		z.uquo(x, y)
  1605  		return z
  1606  	}
  1607  
  1608  	z.acc = Exact
  1609  	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1610  		// ±0 / ±0
  1611  		// ±Inf / ±Inf
  1612  		// value of z is undefined but make sure it's valid
  1613  		z.form = zero
  1614  		z.neg = false
  1615  		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1616  	}
  1617  
  1618  	if x.form == zero || y.form == inf {
  1619  		// ±0 / y
  1620  		// x / ±Inf
  1621  		z.form = zero
  1622  		return z
  1623  	}
  1624  
  1625  	// x / ±0
  1626  	// ±Inf / y
  1627  	z.form = inf
  1628  	return z
  1629  }
  1630  
  1631  // Cmp compares x and y and returns:
  1632  //
  1633  //   -1 if x <  y
  1634  //    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1635  //   +1 if x >  y
  1636  //
  1637  func (x *Float) Cmp(y *Float) int {
  1638  	if debugFloat {
  1639  		x.validate()
  1640  		y.validate()
  1641  	}
  1642  
  1643  	mx := x.ord()
  1644  	my := y.ord()
  1645  	switch {
  1646  	case mx < my:
  1647  		return -1
  1648  	case mx > my:
  1649  		return +1
  1650  	}
  1651  	// mx == my
  1652  
  1653  	// only if |mx| == 1 we have to compare the mantissae
  1654  	switch mx {
  1655  	case -1:
  1656  		return y.ucmp(x)
  1657  	case +1:
  1658  		return x.ucmp(y)
  1659  	}
  1660  
  1661  	return 0
  1662  }
  1663  
  1664  // ord classifies x and returns:
  1665  //
  1666  //	-2 if -Inf == x
  1667  //	-1 if -Inf < x < 0
  1668  //	 0 if x == 0 (signed or unsigned)
  1669  //	+1 if 0 < x < +Inf
  1670  //	+2 if x == +Inf
  1671  //
  1672  func (x *Float) ord() int {
  1673  	var m int
  1674  	switch x.form {
  1675  	case finite:
  1676  		m = 1
  1677  	case zero:
  1678  		return 0
  1679  	case inf:
  1680  		m = 2
  1681  	}
  1682  	if x.neg {
  1683  		m = -m
  1684  	}
  1685  	return m
  1686  }
  1687  
  1688  func umax32(x, y uint32) uint32 {
  1689  	if x > y {
  1690  		return x
  1691  	}
  1692  	return y
  1693  }