github.com/flyinox/gosm@v0.0.0-20171117061539-16768cb62077/src/math/big/float.go (about)

     1  // Copyright 2014 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  // This file implements multi-precision floating-point numbers.
     6  // Like in the GNU MPFR library (http://www.mpfr.org/), operands
     7  // can be of mixed precision. Unlike MPFR, the rounding mode is
     8  // not specified with each operation, but with each operand. The
     9  // rounding mode of the result operand determines the rounding
    10  // mode of an operation. This is a from-scratch implementation.
    11  
    12  package big
    13  
    14  import (
    15  	"fmt"
    16  	"math"
    17  	"math/bits"
    18  )
    19  
    20  const debugFloat = false // enable for debugging
    21  
    22  // A nonzero finite Float represents a multi-precision floating point number
    23  //
    24  //   sign × mantissa × 2**exponent
    25  //
    26  // with 0.5 <= mantissa < 1.0, and MinExp <= exponent <= MaxExp.
    27  // A Float may also be zero (+0, -0) or infinite (+Inf, -Inf).
    28  // All Floats are ordered, and the ordering of two Floats x and y
    29  // is defined by x.Cmp(y).
    30  //
    31  // Each Float value also has a precision, rounding mode, and accuracy.
    32  // The precision is the maximum number of mantissa bits available to
    33  // represent the value. The rounding mode specifies how a result should
    34  // be rounded to fit into the mantissa bits, and accuracy describes the
    35  // rounding error with respect to the exact result.
    36  //
    37  // Unless specified otherwise, all operations (including setters) that
    38  // specify a *Float variable for the result (usually via the receiver
    39  // with the exception of MantExp), round the numeric result according
    40  // to the precision and rounding mode of the result variable.
    41  //
    42  // If the provided result precision is 0 (see below), it is set to the
    43  // precision of the argument with the largest precision value before any
    44  // rounding takes place, and the rounding mode remains unchanged. Thus,
    45  // uninitialized Floats provided as result arguments will have their
    46  // precision set to a reasonable value determined by the operands and
    47  // their mode is the zero value for RoundingMode (ToNearestEven).
    48  //
    49  // By setting the desired precision to 24 or 53 and using matching rounding
    50  // mode (typically ToNearestEven), Float operations produce the same results
    51  // as the corresponding float32 or float64 IEEE-754 arithmetic for operands
    52  // that correspond to normal (i.e., not denormal) float32 or float64 numbers.
    53  // Exponent underflow and overflow lead to a 0 or an Infinity for different
    54  // values than IEEE-754 because Float exponents have a much larger range.
    55  //
    56  // The zero (uninitialized) value for a Float is ready to use and represents
    57  // the number +0.0 exactly, with precision 0 and rounding mode ToNearestEven.
    58  //
    59  type Float struct {
    60  	prec uint32
    61  	mode RoundingMode
    62  	acc  Accuracy
    63  	form form
    64  	neg  bool
    65  	mant nat
    66  	exp  int32
    67  }
    68  
    69  // An ErrNaN panic is raised by a Float operation that would lead to
    70  // a NaN under IEEE-754 rules. An ErrNaN implements the error interface.
    71  type ErrNaN struct {
    72  	msg string
    73  }
    74  
    75  func (err ErrNaN) Error() string {
    76  	return err.msg
    77  }
    78  
    79  // NewFloat allocates and returns a new Float set to x,
    80  // with precision 53 and rounding mode ToNearestEven.
    81  // NewFloat panics with ErrNaN if x is a NaN.
    82  func NewFloat(x float64) *Float {
    83  	if math.IsNaN(x) {
    84  		panic(ErrNaN{"NewFloat(NaN)"})
    85  	}
    86  	return new(Float).SetFloat64(x)
    87  }
    88  
    89  // Exponent and precision limits.
    90  const (
    91  	MaxExp  = math.MaxInt32  // largest supported exponent
    92  	MinExp  = math.MinInt32  // smallest supported exponent
    93  	MaxPrec = math.MaxUint32 // largest (theoretically) supported precision; likely memory-limited
    94  )
    95  
    96  // Internal representation: The mantissa bits x.mant of a nonzero finite
    97  // Float x are stored in a nat slice long enough to hold up to x.prec bits;
    98  // the slice may (but doesn't have to) be shorter if the mantissa contains
    99  // trailing 0 bits. x.mant is normalized if the msb of x.mant == 1 (i.e.,
   100  // the msb is shifted all the way "to the left"). Thus, if the mantissa has
   101  // trailing 0 bits or x.prec is not a multiple of the Word size _W,
   102  // x.mant[0] has trailing zero bits. The msb of the mantissa corresponds
   103  // to the value 0.5; the exponent x.exp shifts the binary point as needed.
   104  //
   105  // A zero or non-finite Float x ignores x.mant and x.exp.
   106  //
   107  // x                 form      neg      mant         exp
   108  // ----------------------------------------------------------
   109  // ±0                zero      sign     -            -
   110  // 0 < |x| < +Inf    finite    sign     mantissa     exponent
   111  // ±Inf              inf       sign     -            -
   112  
   113  // A form value describes the internal representation.
   114  type form byte
   115  
   116  // The form value order is relevant - do not change!
   117  const (
   118  	zero form = iota
   119  	finite
   120  	inf
   121  )
   122  
   123  // RoundingMode determines how a Float value is rounded to the
   124  // desired precision. Rounding may change the Float value; the
   125  // rounding error is described by the Float's Accuracy.
   126  type RoundingMode byte
   127  
   128  // These constants define supported rounding modes.
   129  const (
   130  	ToNearestEven RoundingMode = iota // == IEEE 754-2008 roundTiesToEven
   131  	ToNearestAway                     // == IEEE 754-2008 roundTiesToAway
   132  	ToZero                            // == IEEE 754-2008 roundTowardZero
   133  	AwayFromZero                      // no IEEE 754-2008 equivalent
   134  	ToNegativeInf                     // == IEEE 754-2008 roundTowardNegative
   135  	ToPositiveInf                     // == IEEE 754-2008 roundTowardPositive
   136  )
   137  
   138  //go:generate stringer -type=RoundingMode
   139  
   140  // Accuracy describes the rounding error produced by the most recent
   141  // operation that generated a Float value, relative to the exact value.
   142  type Accuracy int8
   143  
   144  // Constants describing the Accuracy of a Float.
   145  const (
   146  	Below Accuracy = -1
   147  	Exact Accuracy = 0
   148  	Above Accuracy = +1
   149  )
   150  
   151  //go:generate stringer -type=Accuracy
   152  
   153  // SetPrec sets z's precision to prec and returns the (possibly) rounded
   154  // value of z. Rounding occurs according to z's rounding mode if the mantissa
   155  // cannot be represented in prec bits without loss of precision.
   156  // SetPrec(0) maps all finite values to ±0; infinite values remain unchanged.
   157  // If prec > MaxPrec, it is set to MaxPrec.
   158  func (z *Float) SetPrec(prec uint) *Float {
   159  	z.acc = Exact // optimistically assume no rounding is needed
   160  
   161  	// special case
   162  	if prec == 0 {
   163  		z.prec = 0
   164  		if z.form == finite {
   165  			// truncate z to 0
   166  			z.acc = makeAcc(z.neg)
   167  			z.form = zero
   168  		}
   169  		return z
   170  	}
   171  
   172  	// general case
   173  	if prec > MaxPrec {
   174  		prec = MaxPrec
   175  	}
   176  	old := z.prec
   177  	z.prec = uint32(prec)
   178  	if z.prec < old {
   179  		z.round(0)
   180  	}
   181  	return z
   182  }
   183  
   184  func makeAcc(above bool) Accuracy {
   185  	if above {
   186  		return Above
   187  	}
   188  	return Below
   189  }
   190  
   191  // SetMode sets z's rounding mode to mode and returns an exact z.
   192  // z remains unchanged otherwise.
   193  // z.SetMode(z.Mode()) is a cheap way to set z's accuracy to Exact.
   194  func (z *Float) SetMode(mode RoundingMode) *Float {
   195  	z.mode = mode
   196  	z.acc = Exact
   197  	return z
   198  }
   199  
   200  // Prec returns the mantissa precision of x in bits.
   201  // The result may be 0 for |x| == 0 and |x| == Inf.
   202  func (x *Float) Prec() uint {
   203  	return uint(x.prec)
   204  }
   205  
   206  // MinPrec returns the minimum precision required to represent x exactly
   207  // (i.e., the smallest prec before x.SetPrec(prec) would start rounding x).
   208  // The result is 0 for |x| == 0 and |x| == Inf.
   209  func (x *Float) MinPrec() uint {
   210  	if x.form != finite {
   211  		return 0
   212  	}
   213  	return uint(len(x.mant))*_W - x.mant.trailingZeroBits()
   214  }
   215  
   216  // Mode returns the rounding mode of x.
   217  func (x *Float) Mode() RoundingMode {
   218  	return x.mode
   219  }
   220  
   221  // Acc returns the accuracy of x produced by the most recent operation.
   222  func (x *Float) Acc() Accuracy {
   223  	return x.acc
   224  }
   225  
   226  // Sign returns:
   227  //
   228  //	-1 if x <   0
   229  //	 0 if x is ±0
   230  //	+1 if x >   0
   231  //
   232  func (x *Float) Sign() int {
   233  	if debugFloat {
   234  		x.validate()
   235  	}
   236  	if x.form == zero {
   237  		return 0
   238  	}
   239  	if x.neg {
   240  		return -1
   241  	}
   242  	return 1
   243  }
   244  
   245  // MantExp breaks x into its mantissa and exponent components
   246  // and returns the exponent. If a non-nil mant argument is
   247  // provided its value is set to the mantissa of x, with the
   248  // same precision and rounding mode as x. The components
   249  // satisfy x == mant × 2**exp, with 0.5 <= |mant| < 1.0.
   250  // Calling MantExp with a nil argument is an efficient way to
   251  // get the exponent of the receiver.
   252  //
   253  // Special cases are:
   254  //
   255  //	(  ±0).MantExp(mant) = 0, with mant set to   ±0
   256  //	(±Inf).MantExp(mant) = 0, with mant set to ±Inf
   257  //
   258  // x and mant may be the same in which case x is set to its
   259  // mantissa value.
   260  func (x *Float) MantExp(mant *Float) (exp int) {
   261  	if debugFloat {
   262  		x.validate()
   263  	}
   264  	if x.form == finite {
   265  		exp = int(x.exp)
   266  	}
   267  	if mant != nil {
   268  		mant.Copy(x)
   269  		if mant.form == finite {
   270  			mant.exp = 0
   271  		}
   272  	}
   273  	return
   274  }
   275  
   276  func (z *Float) setExpAndRound(exp int64, sbit uint) {
   277  	if exp < MinExp {
   278  		// underflow
   279  		z.acc = makeAcc(z.neg)
   280  		z.form = zero
   281  		return
   282  	}
   283  
   284  	if exp > MaxExp {
   285  		// overflow
   286  		z.acc = makeAcc(!z.neg)
   287  		z.form = inf
   288  		return
   289  	}
   290  
   291  	z.form = finite
   292  	z.exp = int32(exp)
   293  	z.round(sbit)
   294  }
   295  
   296  // SetMantExp sets z to mant × 2**exp and and returns z.
   297  // The result z has the same precision and rounding mode
   298  // as mant. SetMantExp is an inverse of MantExp but does
   299  // not require 0.5 <= |mant| < 1.0. Specifically:
   300  //
   301  //	mant := new(Float)
   302  //	new(Float).SetMantExp(mant, x.MantExp(mant)).Cmp(x) == 0
   303  //
   304  // Special cases are:
   305  //
   306  //	z.SetMantExp(  ±0, exp) =   ±0
   307  //	z.SetMantExp(±Inf, exp) = ±Inf
   308  //
   309  // z and mant may be the same in which case z's exponent
   310  // is set to exp.
   311  func (z *Float) SetMantExp(mant *Float, exp int) *Float {
   312  	if debugFloat {
   313  		z.validate()
   314  		mant.validate()
   315  	}
   316  	z.Copy(mant)
   317  	if z.form != finite {
   318  		return z
   319  	}
   320  	z.setExpAndRound(int64(z.exp)+int64(exp), 0)
   321  	return z
   322  }
   323  
   324  // Signbit returns true if x is negative or negative zero.
   325  func (x *Float) Signbit() bool {
   326  	return x.neg
   327  }
   328  
   329  // IsInf reports whether x is +Inf or -Inf.
   330  func (x *Float) IsInf() bool {
   331  	return x.form == inf
   332  }
   333  
   334  // IsInt reports whether x is an integer.
   335  // ±Inf values are not integers.
   336  func (x *Float) IsInt() bool {
   337  	if debugFloat {
   338  		x.validate()
   339  	}
   340  	// special cases
   341  	if x.form != finite {
   342  		return x.form == zero
   343  	}
   344  	// x.form == finite
   345  	if x.exp <= 0 {
   346  		return false
   347  	}
   348  	// x.exp > 0
   349  	return x.prec <= uint32(x.exp) || x.MinPrec() <= uint(x.exp) // not enough bits for fractional mantissa
   350  }
   351  
   352  // debugging support
   353  func (x *Float) validate() {
   354  	if !debugFloat {
   355  		// avoid performance bugs
   356  		panic("validate called but debugFloat is not set")
   357  	}
   358  	if x.form != finite {
   359  		return
   360  	}
   361  	m := len(x.mant)
   362  	if m == 0 {
   363  		panic("nonzero finite number with empty mantissa")
   364  	}
   365  	const msb = 1 << (_W - 1)
   366  	if x.mant[m-1]&msb == 0 {
   367  		panic(fmt.Sprintf("msb not set in last word %#x of %s", x.mant[m-1], x.Text('p', 0)))
   368  	}
   369  	if x.prec == 0 {
   370  		panic("zero precision finite number")
   371  	}
   372  }
   373  
   374  // round rounds z according to z.mode to z.prec bits and sets z.acc accordingly.
   375  // sbit must be 0 or 1 and summarizes any "sticky bit" information one might
   376  // have before calling round. z's mantissa must be normalized (with the msb set)
   377  // or empty.
   378  //
   379  // CAUTION: The rounding modes ToNegativeInf, ToPositiveInf are affected by the
   380  // sign of z. For correct rounding, the sign of z must be set correctly before
   381  // calling round.
   382  func (z *Float) round(sbit uint) {
   383  	if debugFloat {
   384  		z.validate()
   385  	}
   386  
   387  	z.acc = Exact
   388  	if z.form != finite {
   389  		// ±0 or ±Inf => nothing left to do
   390  		return
   391  	}
   392  	// z.form == finite && len(z.mant) > 0
   393  	// m > 0 implies z.prec > 0 (checked by validate)
   394  
   395  	m := uint32(len(z.mant)) // present mantissa length in words
   396  	bits := m * _W           // present mantissa bits; bits > 0
   397  	if bits <= z.prec {
   398  		// mantissa fits => nothing to do
   399  		return
   400  	}
   401  	// bits > z.prec
   402  
   403  	// Rounding is based on two bits: the rounding bit (rbit) and the
   404  	// sticky bit (sbit). The rbit is the bit immediately before the
   405  	// z.prec leading mantissa bits (the "0.5"). The sbit is set if any
   406  	// of the bits before the rbit are set (the "0.25", "0.125", etc.):
   407  	//
   408  	//   rbit  sbit  => "fractional part"
   409  	//
   410  	//   0     0        == 0
   411  	//   0     1        >  0  , < 0.5
   412  	//   1     0        == 0.5
   413  	//   1     1        >  0.5, < 1.0
   414  
   415  	// bits > z.prec: mantissa too large => round
   416  	r := uint(bits - z.prec - 1) // rounding bit position; r >= 0
   417  	rbit := z.mant.bit(r) & 1    // rounding bit; be safe and ensure it's a single bit
   418  	if sbit == 0 {
   419  		// TODO(gri) if rbit != 0 we don't need to compute sbit for some rounding modes (optimization)
   420  		sbit = z.mant.sticky(r)
   421  	}
   422  	sbit &= 1 // be safe and ensure it's a single bit
   423  
   424  	// cut off extra words
   425  	n := (z.prec + (_W - 1)) / _W // mantissa length in words for desired precision
   426  	if m > n {
   427  		copy(z.mant, z.mant[m-n:]) // move n last words to front
   428  		z.mant = z.mant[:n]
   429  	}
   430  
   431  	// determine number of trailing zero bits (ntz) and compute lsb mask of mantissa's least-significant word
   432  	ntz := n*_W - z.prec // 0 <= ntz < _W
   433  	lsb := Word(1) << ntz
   434  
   435  	// round if result is inexact
   436  	if rbit|sbit != 0 {
   437  		// Make rounding decision: The result mantissa is truncated ("rounded down")
   438  		// by default. Decide if we need to increment, or "round up", the (unsigned)
   439  		// mantissa.
   440  		inc := false
   441  		switch z.mode {
   442  		case ToNegativeInf:
   443  			inc = z.neg
   444  		case ToZero:
   445  			// nothing to do
   446  		case ToNearestEven:
   447  			inc = rbit != 0 && (sbit != 0 || z.mant[0]&lsb != 0)
   448  		case ToNearestAway:
   449  			inc = rbit != 0
   450  		case AwayFromZero:
   451  			inc = true
   452  		case ToPositiveInf:
   453  			inc = !z.neg
   454  		default:
   455  			panic("unreachable")
   456  		}
   457  
   458  		// A positive result (!z.neg) is Above the exact result if we increment,
   459  		// and it's Below if we truncate (Exact results require no rounding).
   460  		// For a negative result (z.neg) it is exactly the opposite.
   461  		z.acc = makeAcc(inc != z.neg)
   462  
   463  		if inc {
   464  			// add 1 to mantissa
   465  			if addVW(z.mant, z.mant, lsb) != 0 {
   466  				// mantissa overflow => adjust exponent
   467  				if z.exp >= MaxExp {
   468  					// exponent overflow
   469  					z.form = inf
   470  					return
   471  				}
   472  				z.exp++
   473  				// adjust mantissa: divide by 2 to compensate for exponent adjustment
   474  				shrVU(z.mant, z.mant, 1)
   475  				// set msb == carry == 1 from the mantissa overflow above
   476  				const msb = 1 << (_W - 1)
   477  				z.mant[n-1] |= msb
   478  			}
   479  		}
   480  	}
   481  
   482  	// zero out trailing bits in least-significant word
   483  	z.mant[0] &^= lsb - 1
   484  
   485  	if debugFloat {
   486  		z.validate()
   487  	}
   488  }
   489  
   490  func (z *Float) setBits64(neg bool, x uint64) *Float {
   491  	if z.prec == 0 {
   492  		z.prec = 64
   493  	}
   494  	z.acc = Exact
   495  	z.neg = neg
   496  	if x == 0 {
   497  		z.form = zero
   498  		return z
   499  	}
   500  	// x != 0
   501  	z.form = finite
   502  	s := bits.LeadingZeros64(x)
   503  	z.mant = z.mant.setUint64(x << uint(s))
   504  	z.exp = int32(64 - s) // always fits
   505  	if z.prec < 64 {
   506  		z.round(0)
   507  	}
   508  	return z
   509  }
   510  
   511  // SetUint64 sets z to the (possibly rounded) value of x and returns z.
   512  // If z's precision is 0, it is changed to 64 (and rounding will have
   513  // no effect).
   514  func (z *Float) SetUint64(x uint64) *Float {
   515  	return z.setBits64(false, x)
   516  }
   517  
   518  // SetInt64 sets z to the (possibly rounded) value of x and returns z.
   519  // If z's precision is 0, it is changed to 64 (and rounding will have
   520  // no effect).
   521  func (z *Float) SetInt64(x int64) *Float {
   522  	u := x
   523  	if u < 0 {
   524  		u = -u
   525  	}
   526  	// We cannot simply call z.SetUint64(uint64(u)) and change
   527  	// the sign afterwards because the sign affects rounding.
   528  	return z.setBits64(x < 0, uint64(u))
   529  }
   530  
   531  // SetFloat64 sets z to the (possibly rounded) value of x and returns z.
   532  // If z's precision is 0, it is changed to 53 (and rounding will have
   533  // no effect). SetFloat64 panics with ErrNaN if x is a NaN.
   534  func (z *Float) SetFloat64(x float64) *Float {
   535  	if z.prec == 0 {
   536  		z.prec = 53
   537  	}
   538  	if math.IsNaN(x) {
   539  		panic(ErrNaN{"Float.SetFloat64(NaN)"})
   540  	}
   541  	z.acc = Exact
   542  	z.neg = math.Signbit(x) // handle -0, -Inf correctly
   543  	if x == 0 {
   544  		z.form = zero
   545  		return z
   546  	}
   547  	if math.IsInf(x, 0) {
   548  		z.form = inf
   549  		return z
   550  	}
   551  	// normalized x != 0
   552  	z.form = finite
   553  	fmant, exp := math.Frexp(x) // get normalized mantissa
   554  	z.mant = z.mant.setUint64(1<<63 | math.Float64bits(fmant)<<11)
   555  	z.exp = int32(exp) // always fits
   556  	if z.prec < 53 {
   557  		z.round(0)
   558  	}
   559  	return z
   560  }
   561  
   562  // fnorm normalizes mantissa m by shifting it to the left
   563  // such that the msb of the most-significant word (msw) is 1.
   564  // It returns the shift amount. It assumes that len(m) != 0.
   565  func fnorm(m nat) int64 {
   566  	if debugFloat && (len(m) == 0 || m[len(m)-1] == 0) {
   567  		panic("msw of mantissa is 0")
   568  	}
   569  	s := nlz(m[len(m)-1])
   570  	if s > 0 {
   571  		c := shlVU(m, m, s)
   572  		if debugFloat && c != 0 {
   573  			panic("nlz or shlVU incorrect")
   574  		}
   575  	}
   576  	return int64(s)
   577  }
   578  
   579  // SetInt sets z to the (possibly rounded) value of x and returns z.
   580  // If z's precision is 0, it is changed to the larger of x.BitLen()
   581  // or 64 (and rounding will have no effect).
   582  func (z *Float) SetInt(x *Int) *Float {
   583  	// TODO(gri) can be more efficient if z.prec > 0
   584  	// but small compared to the size of x, or if there
   585  	// are many trailing 0's.
   586  	bits := uint32(x.BitLen())
   587  	if z.prec == 0 {
   588  		z.prec = umax32(bits, 64)
   589  	}
   590  	z.acc = Exact
   591  	z.neg = x.neg
   592  	if len(x.abs) == 0 {
   593  		z.form = zero
   594  		return z
   595  	}
   596  	// x != 0
   597  	z.mant = z.mant.set(x.abs)
   598  	fnorm(z.mant)
   599  	z.setExpAndRound(int64(bits), 0)
   600  	return z
   601  }
   602  
   603  // SetRat sets z to the (possibly rounded) value of x and returns z.
   604  // If z's precision is 0, it is changed to the largest of a.BitLen(),
   605  // b.BitLen(), or 64; with x = a/b.
   606  func (z *Float) SetRat(x *Rat) *Float {
   607  	if x.IsInt() {
   608  		return z.SetInt(x.Num())
   609  	}
   610  	var a, b Float
   611  	a.SetInt(x.Num())
   612  	b.SetInt(x.Denom())
   613  	if z.prec == 0 {
   614  		z.prec = umax32(a.prec, b.prec)
   615  	}
   616  	return z.Quo(&a, &b)
   617  }
   618  
   619  // SetInf sets z to the infinite Float -Inf if signbit is
   620  // set, or +Inf if signbit is not set, and returns z. The
   621  // precision of z is unchanged and the result is always
   622  // Exact.
   623  func (z *Float) SetInf(signbit bool) *Float {
   624  	z.acc = Exact
   625  	z.form = inf
   626  	z.neg = signbit
   627  	return z
   628  }
   629  
   630  // Set sets z to the (possibly rounded) value of x and returns z.
   631  // If z's precision is 0, it is changed to the precision of x
   632  // before setting z (and rounding will have no effect).
   633  // Rounding is performed according to z's precision and rounding
   634  // mode; and z's accuracy reports the result error relative to the
   635  // exact (not rounded) result.
   636  func (z *Float) Set(x *Float) *Float {
   637  	if debugFloat {
   638  		x.validate()
   639  	}
   640  	z.acc = Exact
   641  	if z != x {
   642  		z.form = x.form
   643  		z.neg = x.neg
   644  		if x.form == finite {
   645  			z.exp = x.exp
   646  			z.mant = z.mant.set(x.mant)
   647  		}
   648  		if z.prec == 0 {
   649  			z.prec = x.prec
   650  		} else if z.prec < x.prec {
   651  			z.round(0)
   652  		}
   653  	}
   654  	return z
   655  }
   656  
   657  // Copy sets z to x, with the same precision, rounding mode, and
   658  // accuracy as x, and returns z. x is not changed even if z and
   659  // x are the same.
   660  func (z *Float) Copy(x *Float) *Float {
   661  	if debugFloat {
   662  		x.validate()
   663  	}
   664  	if z != x {
   665  		z.prec = x.prec
   666  		z.mode = x.mode
   667  		z.acc = x.acc
   668  		z.form = x.form
   669  		z.neg = x.neg
   670  		if z.form == finite {
   671  			z.mant = z.mant.set(x.mant)
   672  			z.exp = x.exp
   673  		}
   674  	}
   675  	return z
   676  }
   677  
   678  // msb32 returns the 32 most significant bits of x.
   679  func msb32(x nat) uint32 {
   680  	i := len(x) - 1
   681  	if i < 0 {
   682  		return 0
   683  	}
   684  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   685  		panic("x not normalized")
   686  	}
   687  	switch _W {
   688  	case 32:
   689  		return uint32(x[i])
   690  	case 64:
   691  		return uint32(x[i] >> 32)
   692  	}
   693  	panic("unreachable")
   694  }
   695  
   696  // msb64 returns the 64 most significant bits of x.
   697  func msb64(x nat) uint64 {
   698  	i := len(x) - 1
   699  	if i < 0 {
   700  		return 0
   701  	}
   702  	if debugFloat && x[i]&(1<<(_W-1)) == 0 {
   703  		panic("x not normalized")
   704  	}
   705  	switch _W {
   706  	case 32:
   707  		v := uint64(x[i]) << 32
   708  		if i > 0 {
   709  			v |= uint64(x[i-1])
   710  		}
   711  		return v
   712  	case 64:
   713  		return uint64(x[i])
   714  	}
   715  	panic("unreachable")
   716  }
   717  
   718  // Uint64 returns the unsigned integer resulting from truncating x
   719  // towards zero. If 0 <= x <= math.MaxUint64, the result is Exact
   720  // if x is an integer and Below otherwise.
   721  // The result is (0, Above) for x < 0, and (math.MaxUint64, Below)
   722  // for x > math.MaxUint64.
   723  func (x *Float) Uint64() (uint64, Accuracy) {
   724  	if debugFloat {
   725  		x.validate()
   726  	}
   727  
   728  	switch x.form {
   729  	case finite:
   730  		if x.neg {
   731  			return 0, Above
   732  		}
   733  		// 0 < x < +Inf
   734  		if x.exp <= 0 {
   735  			// 0 < x < 1
   736  			return 0, Below
   737  		}
   738  		// 1 <= x < Inf
   739  		if x.exp <= 64 {
   740  			// u = trunc(x) fits into a uint64
   741  			u := msb64(x.mant) >> (64 - uint32(x.exp))
   742  			if x.MinPrec() <= 64 {
   743  				return u, Exact
   744  			}
   745  			return u, Below // x truncated
   746  		}
   747  		// x too large
   748  		return math.MaxUint64, Below
   749  
   750  	case zero:
   751  		return 0, Exact
   752  
   753  	case inf:
   754  		if x.neg {
   755  			return 0, Above
   756  		}
   757  		return math.MaxUint64, Below
   758  	}
   759  
   760  	panic("unreachable")
   761  }
   762  
   763  // Int64 returns the integer resulting from truncating x towards zero.
   764  // If math.MinInt64 <= x <= math.MaxInt64, the result is Exact if x is
   765  // an integer, and Above (x < 0) or Below (x > 0) otherwise.
   766  // The result is (math.MinInt64, Above) for x < math.MinInt64,
   767  // and (math.MaxInt64, Below) for x > math.MaxInt64.
   768  func (x *Float) Int64() (int64, Accuracy) {
   769  	if debugFloat {
   770  		x.validate()
   771  	}
   772  
   773  	switch x.form {
   774  	case finite:
   775  		// 0 < |x| < +Inf
   776  		acc := makeAcc(x.neg)
   777  		if x.exp <= 0 {
   778  			// 0 < |x| < 1
   779  			return 0, acc
   780  		}
   781  		// x.exp > 0
   782  
   783  		// 1 <= |x| < +Inf
   784  		if x.exp <= 63 {
   785  			// i = trunc(x) fits into an int64 (excluding math.MinInt64)
   786  			i := int64(msb64(x.mant) >> (64 - uint32(x.exp)))
   787  			if x.neg {
   788  				i = -i
   789  			}
   790  			if x.MinPrec() <= uint(x.exp) {
   791  				return i, Exact
   792  			}
   793  			return i, acc // x truncated
   794  		}
   795  		if x.neg {
   796  			// check for special case x == math.MinInt64 (i.e., x == -(0.5 << 64))
   797  			if x.exp == 64 && x.MinPrec() == 1 {
   798  				acc = Exact
   799  			}
   800  			return math.MinInt64, acc
   801  		}
   802  		// x too large
   803  		return math.MaxInt64, Below
   804  
   805  	case zero:
   806  		return 0, Exact
   807  
   808  	case inf:
   809  		if x.neg {
   810  			return math.MinInt64, Above
   811  		}
   812  		return math.MaxInt64, Below
   813  	}
   814  
   815  	panic("unreachable")
   816  }
   817  
   818  // Float32 returns the float32 value nearest to x. If x is too small to be
   819  // represented by a float32 (|x| < math.SmallestNonzeroFloat32), the result
   820  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   821  // If x is too large to be represented by a float32 (|x| > math.MaxFloat32),
   822  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   823  func (x *Float) Float32() (float32, Accuracy) {
   824  	if debugFloat {
   825  		x.validate()
   826  	}
   827  
   828  	switch x.form {
   829  	case finite:
   830  		// 0 < |x| < +Inf
   831  
   832  		const (
   833  			fbits = 32                //        float size
   834  			mbits = 23                //        mantissa size (excluding implicit msb)
   835  			ebits = fbits - mbits - 1 //     8  exponent size
   836  			bias  = 1<<(ebits-1) - 1  //   127  exponent bias
   837  			dmin  = 1 - bias - mbits  //  -149  smallest unbiased exponent (denormal)
   838  			emin  = 1 - bias          //  -126  smallest unbiased exponent (normal)
   839  			emax  = bias              //   127  largest unbiased exponent (normal)
   840  		)
   841  
   842  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float32 mantissa.
   843  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   844  
   845  		// Compute precision p for float32 mantissa.
   846  		// If the exponent is too small, we have a denormal number before
   847  		// rounding and fewer than p mantissa bits of precision available
   848  		// (the exponent remains fixed but the mantissa gets shifted right).
   849  		p := mbits + 1 // precision of normal float
   850  		if e < emin {
   851  			// recompute precision
   852  			p = mbits + 1 - emin + int(e)
   853  			// If p == 0, the mantissa of x is shifted so much to the right
   854  			// that its msb falls immediately to the right of the float32
   855  			// mantissa space. In other words, if the smallest denormal is
   856  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   857  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   858  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
   859  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   860  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   861  				// underflow to ±0
   862  				if x.neg {
   863  					var z float32
   864  					return -z, Above
   865  				}
   866  				return 0.0, Below
   867  			}
   868  			// otherwise, round up
   869  			// We handle p == 0 explicitly because it's easy and because
   870  			// Float.round doesn't support rounding to 0 bits of precision.
   871  			if p == 0 {
   872  				if x.neg {
   873  					return -math.SmallestNonzeroFloat32, Below
   874  				}
   875  				return math.SmallestNonzeroFloat32, Above
   876  			}
   877  		}
   878  		// p > 0
   879  
   880  		// round
   881  		var r Float
   882  		r.prec = uint32(p)
   883  		r.Set(x)
   884  		e = r.exp - 1
   885  
   886  		// Rounding may have caused r to overflow to ±Inf
   887  		// (rounding never causes underflows to 0).
   888  		// If the exponent is too large, also overflow to ±Inf.
   889  		if r.form == inf || e > emax {
   890  			// overflow
   891  			if x.neg {
   892  				return float32(math.Inf(-1)), Below
   893  			}
   894  			return float32(math.Inf(+1)), Above
   895  		}
   896  		// e <= emax
   897  
   898  		// Determine sign, biased exponent, and mantissa.
   899  		var sign, bexp, mant uint32
   900  		if x.neg {
   901  			sign = 1 << (fbits - 1)
   902  		}
   903  
   904  		// Rounding may have caused a denormal number to
   905  		// become normal. Check again.
   906  		if e < emin {
   907  			// denormal number: recompute precision
   908  			// Since rounding may have at best increased precision
   909  			// and we have eliminated p <= 0 early, we know p > 0.
   910  			// bexp == 0 for denormals
   911  			p = mbits + 1 - emin + int(e)
   912  			mant = msb32(r.mant) >> uint(fbits-p)
   913  		} else {
   914  			// normal number: emin <= e <= emax
   915  			bexp = uint32(e+bias) << mbits
   916  			mant = msb32(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
   917  		}
   918  
   919  		return math.Float32frombits(sign | bexp | mant), r.acc
   920  
   921  	case zero:
   922  		if x.neg {
   923  			var z float32
   924  			return -z, Exact
   925  		}
   926  		return 0.0, Exact
   927  
   928  	case inf:
   929  		if x.neg {
   930  			return float32(math.Inf(-1)), Exact
   931  		}
   932  		return float32(math.Inf(+1)), Exact
   933  	}
   934  
   935  	panic("unreachable")
   936  }
   937  
   938  // Float64 returns the float64 value nearest to x. If x is too small to be
   939  // represented by a float64 (|x| < math.SmallestNonzeroFloat64), the result
   940  // is (0, Below) or (-0, Above), respectively, depending on the sign of x.
   941  // If x is too large to be represented by a float64 (|x| > math.MaxFloat64),
   942  // the result is (+Inf, Above) or (-Inf, Below), depending on the sign of x.
   943  func (x *Float) Float64() (float64, Accuracy) {
   944  	if debugFloat {
   945  		x.validate()
   946  	}
   947  
   948  	switch x.form {
   949  	case finite:
   950  		// 0 < |x| < +Inf
   951  
   952  		const (
   953  			fbits = 64                //        float size
   954  			mbits = 52                //        mantissa size (excluding implicit msb)
   955  			ebits = fbits - mbits - 1 //    11  exponent size
   956  			bias  = 1<<(ebits-1) - 1  //  1023  exponent bias
   957  			dmin  = 1 - bias - mbits  // -1074  smallest unbiased exponent (denormal)
   958  			emin  = 1 - bias          // -1022  smallest unbiased exponent (normal)
   959  			emax  = bias              //  1023  largest unbiased exponent (normal)
   960  		)
   961  
   962  		// Float mantissa m is 0.5 <= m < 1.0; compute exponent e for float64 mantissa.
   963  		e := x.exp - 1 // exponent for normal mantissa m with 1.0 <= m < 2.0
   964  
   965  		// Compute precision p for float64 mantissa.
   966  		// If the exponent is too small, we have a denormal number before
   967  		// rounding and fewer than p mantissa bits of precision available
   968  		// (the exponent remains fixed but the mantissa gets shifted right).
   969  		p := mbits + 1 // precision of normal float
   970  		if e < emin {
   971  			// recompute precision
   972  			p = mbits + 1 - emin + int(e)
   973  			// If p == 0, the mantissa of x is shifted so much to the right
   974  			// that its msb falls immediately to the right of the float64
   975  			// mantissa space. In other words, if the smallest denormal is
   976  			// considered "1.0", for p == 0, the mantissa value m is >= 0.5.
   977  			// If m > 0.5, it is rounded up to 1.0; i.e., the smallest denormal.
   978  			// If m == 0.5, it is rounded down to even, i.e., 0.0.
   979  			// If p < 0, the mantissa value m is <= "0.25" which is never rounded up.
   980  			if p < 0 /* m <= 0.25 */ || p == 0 && x.mant.sticky(uint(len(x.mant))*_W-1) == 0 /* m == 0.5 */ {
   981  				// underflow to ±0
   982  				if x.neg {
   983  					var z float64
   984  					return -z, Above
   985  				}
   986  				return 0.0, Below
   987  			}
   988  			// otherwise, round up
   989  			// We handle p == 0 explicitly because it's easy and because
   990  			// Float.round doesn't support rounding to 0 bits of precision.
   991  			if p == 0 {
   992  				if x.neg {
   993  					return -math.SmallestNonzeroFloat64, Below
   994  				}
   995  				return math.SmallestNonzeroFloat64, Above
   996  			}
   997  		}
   998  		// p > 0
   999  
  1000  		// round
  1001  		var r Float
  1002  		r.prec = uint32(p)
  1003  		r.Set(x)
  1004  		e = r.exp - 1
  1005  
  1006  		// Rounding may have caused r to overflow to ±Inf
  1007  		// (rounding never causes underflows to 0).
  1008  		// If the exponent is too large, also overflow to ±Inf.
  1009  		if r.form == inf || e > emax {
  1010  			// overflow
  1011  			if x.neg {
  1012  				return math.Inf(-1), Below
  1013  			}
  1014  			return math.Inf(+1), Above
  1015  		}
  1016  		// e <= emax
  1017  
  1018  		// Determine sign, biased exponent, and mantissa.
  1019  		var sign, bexp, mant uint64
  1020  		if x.neg {
  1021  			sign = 1 << (fbits - 1)
  1022  		}
  1023  
  1024  		// Rounding may have caused a denormal number to
  1025  		// become normal. Check again.
  1026  		if e < emin {
  1027  			// denormal number: recompute precision
  1028  			// Since rounding may have at best increased precision
  1029  			// and we have eliminated p <= 0 early, we know p > 0.
  1030  			// bexp == 0 for denormals
  1031  			p = mbits + 1 - emin + int(e)
  1032  			mant = msb64(r.mant) >> uint(fbits-p)
  1033  		} else {
  1034  			// normal number: emin <= e <= emax
  1035  			bexp = uint64(e+bias) << mbits
  1036  			mant = msb64(r.mant) >> ebits & (1<<mbits - 1) // cut off msb (implicit 1 bit)
  1037  		}
  1038  
  1039  		return math.Float64frombits(sign | bexp | mant), r.acc
  1040  
  1041  	case zero:
  1042  		if x.neg {
  1043  			var z float64
  1044  			return -z, Exact
  1045  		}
  1046  		return 0.0, Exact
  1047  
  1048  	case inf:
  1049  		if x.neg {
  1050  			return math.Inf(-1), Exact
  1051  		}
  1052  		return math.Inf(+1), Exact
  1053  	}
  1054  
  1055  	panic("unreachable")
  1056  }
  1057  
  1058  // Int returns the result of truncating x towards zero;
  1059  // or nil if x is an infinity.
  1060  // The result is Exact if x.IsInt(); otherwise it is Below
  1061  // for x > 0, and Above for x < 0.
  1062  // If a non-nil *Int argument z is provided, Int stores
  1063  // the result in z instead of allocating a new Int.
  1064  func (x *Float) Int(z *Int) (*Int, Accuracy) {
  1065  	if debugFloat {
  1066  		x.validate()
  1067  	}
  1068  
  1069  	if z == nil && x.form <= finite {
  1070  		z = new(Int)
  1071  	}
  1072  
  1073  	switch x.form {
  1074  	case finite:
  1075  		// 0 < |x| < +Inf
  1076  		acc := makeAcc(x.neg)
  1077  		if x.exp <= 0 {
  1078  			// 0 < |x| < 1
  1079  			return z.SetInt64(0), acc
  1080  		}
  1081  		// x.exp > 0
  1082  
  1083  		// 1 <= |x| < +Inf
  1084  		// determine minimum required precision for x
  1085  		allBits := uint(len(x.mant)) * _W
  1086  		exp := uint(x.exp)
  1087  		if x.MinPrec() <= exp {
  1088  			acc = Exact
  1089  		}
  1090  		// shift mantissa as needed
  1091  		if z == nil {
  1092  			z = new(Int)
  1093  		}
  1094  		z.neg = x.neg
  1095  		switch {
  1096  		case exp > allBits:
  1097  			z.abs = z.abs.shl(x.mant, exp-allBits)
  1098  		default:
  1099  			z.abs = z.abs.set(x.mant)
  1100  		case exp < allBits:
  1101  			z.abs = z.abs.shr(x.mant, allBits-exp)
  1102  		}
  1103  		return z, acc
  1104  
  1105  	case zero:
  1106  		return z.SetInt64(0), Exact
  1107  
  1108  	case inf:
  1109  		return nil, makeAcc(x.neg)
  1110  	}
  1111  
  1112  	panic("unreachable")
  1113  }
  1114  
  1115  // Rat returns the rational number corresponding to x;
  1116  // or nil if x is an infinity.
  1117  // The result is Exact if x is not an Inf.
  1118  // If a non-nil *Rat argument z is provided, Rat stores
  1119  // the result in z instead of allocating a new Rat.
  1120  func (x *Float) Rat(z *Rat) (*Rat, Accuracy) {
  1121  	if debugFloat {
  1122  		x.validate()
  1123  	}
  1124  
  1125  	if z == nil && x.form <= finite {
  1126  		z = new(Rat)
  1127  	}
  1128  
  1129  	switch x.form {
  1130  	case finite:
  1131  		// 0 < |x| < +Inf
  1132  		allBits := int32(len(x.mant)) * _W
  1133  		// build up numerator and denominator
  1134  		z.a.neg = x.neg
  1135  		switch {
  1136  		case x.exp > allBits:
  1137  			z.a.abs = z.a.abs.shl(x.mant, uint(x.exp-allBits))
  1138  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1139  			// z already in normal form
  1140  		default:
  1141  			z.a.abs = z.a.abs.set(x.mant)
  1142  			z.b.abs = z.b.abs[:0] // == 1 (see Rat)
  1143  			// z already in normal form
  1144  		case x.exp < allBits:
  1145  			z.a.abs = z.a.abs.set(x.mant)
  1146  			t := z.b.abs.setUint64(1)
  1147  			z.b.abs = t.shl(t, uint(allBits-x.exp))
  1148  			z.norm()
  1149  		}
  1150  		return z, Exact
  1151  
  1152  	case zero:
  1153  		return z.SetInt64(0), Exact
  1154  
  1155  	case inf:
  1156  		return nil, makeAcc(x.neg)
  1157  	}
  1158  
  1159  	panic("unreachable")
  1160  }
  1161  
  1162  // Abs sets z to the (possibly rounded) value |x| (the absolute value of x)
  1163  // and returns z.
  1164  func (z *Float) Abs(x *Float) *Float {
  1165  	z.Set(x)
  1166  	z.neg = false
  1167  	return z
  1168  }
  1169  
  1170  // Neg sets z to the (possibly rounded) value of x with its sign negated,
  1171  // and returns z.
  1172  func (z *Float) Neg(x *Float) *Float {
  1173  	z.Set(x)
  1174  	z.neg = !z.neg
  1175  	return z
  1176  }
  1177  
  1178  func validateBinaryOperands(x, y *Float) {
  1179  	if !debugFloat {
  1180  		// avoid performance bugs
  1181  		panic("validateBinaryOperands called but debugFloat is not set")
  1182  	}
  1183  	if len(x.mant) == 0 {
  1184  		panic("empty mantissa for x")
  1185  	}
  1186  	if len(y.mant) == 0 {
  1187  		panic("empty mantissa for y")
  1188  	}
  1189  }
  1190  
  1191  // z = x + y, ignoring signs of x and y for the addition
  1192  // but using the sign of z for rounding the result.
  1193  // x and y must have a non-empty mantissa and valid exponent.
  1194  func (z *Float) uadd(x, y *Float) {
  1195  	// Note: This implementation requires 2 shifts most of the
  1196  	// time. It is also inefficient if exponents or precisions
  1197  	// differ by wide margins. The following article describes
  1198  	// an efficient (but much more complicated) implementation
  1199  	// compatible with the internal representation used here:
  1200  	//
  1201  	// Vincent Lefèvre: "The Generic Multiple-Precision Floating-
  1202  	// Point Addition With Exact Rounding (as in the MPFR Library)"
  1203  	// http://www.vinc17.net/research/papers/rnc6.pdf
  1204  
  1205  	if debugFloat {
  1206  		validateBinaryOperands(x, y)
  1207  	}
  1208  
  1209  	// compute exponents ex, ey for mantissa with "binary point"
  1210  	// on the right (mantissa.0) - use int64 to avoid overflow
  1211  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1212  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1213  
  1214  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1215  
  1216  	// TODO(gri) having a combined add-and-shift primitive
  1217  	//           could make this code significantly faster
  1218  	switch {
  1219  	case ex < ey:
  1220  		if al {
  1221  			t := nat(nil).shl(y.mant, uint(ey-ex))
  1222  			z.mant = z.mant.add(x.mant, t)
  1223  		} else {
  1224  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1225  			z.mant = z.mant.add(x.mant, z.mant)
  1226  		}
  1227  	default:
  1228  		// ex == ey, no shift needed
  1229  		z.mant = z.mant.add(x.mant, y.mant)
  1230  	case ex > ey:
  1231  		if al {
  1232  			t := nat(nil).shl(x.mant, uint(ex-ey))
  1233  			z.mant = z.mant.add(t, y.mant)
  1234  		} else {
  1235  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1236  			z.mant = z.mant.add(z.mant, y.mant)
  1237  		}
  1238  		ex = ey
  1239  	}
  1240  	// len(z.mant) > 0
  1241  
  1242  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1243  }
  1244  
  1245  // z = x - y for |x| > |y|, ignoring signs of x and y for the subtraction
  1246  // but using the sign of z for rounding the result.
  1247  // x and y must have a non-empty mantissa and valid exponent.
  1248  func (z *Float) usub(x, y *Float) {
  1249  	// This code is symmetric to uadd.
  1250  	// We have not factored the common code out because
  1251  	// eventually uadd (and usub) should be optimized
  1252  	// by special-casing, and the code will diverge.
  1253  
  1254  	if debugFloat {
  1255  		validateBinaryOperands(x, y)
  1256  	}
  1257  
  1258  	ex := int64(x.exp) - int64(len(x.mant))*_W
  1259  	ey := int64(y.exp) - int64(len(y.mant))*_W
  1260  
  1261  	al := alias(z.mant, x.mant) || alias(z.mant, y.mant)
  1262  
  1263  	switch {
  1264  	case ex < ey:
  1265  		if al {
  1266  			t := nat(nil).shl(y.mant, uint(ey-ex))
  1267  			z.mant = t.sub(x.mant, t)
  1268  		} else {
  1269  			z.mant = z.mant.shl(y.mant, uint(ey-ex))
  1270  			z.mant = z.mant.sub(x.mant, z.mant)
  1271  		}
  1272  	default:
  1273  		// ex == ey, no shift needed
  1274  		z.mant = z.mant.sub(x.mant, y.mant)
  1275  	case ex > ey:
  1276  		if al {
  1277  			t := nat(nil).shl(x.mant, uint(ex-ey))
  1278  			z.mant = t.sub(t, y.mant)
  1279  		} else {
  1280  			z.mant = z.mant.shl(x.mant, uint(ex-ey))
  1281  			z.mant = z.mant.sub(z.mant, y.mant)
  1282  		}
  1283  		ex = ey
  1284  	}
  1285  
  1286  	// operands may have canceled each other out
  1287  	if len(z.mant) == 0 {
  1288  		z.acc = Exact
  1289  		z.form = zero
  1290  		z.neg = false
  1291  		return
  1292  	}
  1293  	// len(z.mant) > 0
  1294  
  1295  	z.setExpAndRound(ex+int64(len(z.mant))*_W-fnorm(z.mant), 0)
  1296  }
  1297  
  1298  // z = x * y, ignoring signs of x and y for the multiplication
  1299  // but using the sign of z for rounding the result.
  1300  // x and y must have a non-empty mantissa and valid exponent.
  1301  func (z *Float) umul(x, y *Float) {
  1302  	if debugFloat {
  1303  		validateBinaryOperands(x, y)
  1304  	}
  1305  
  1306  	// Note: This is doing too much work if the precision
  1307  	// of z is less than the sum of the precisions of x
  1308  	// and y which is often the case (e.g., if all floats
  1309  	// have the same precision).
  1310  	// TODO(gri) Optimize this for the common case.
  1311  
  1312  	e := int64(x.exp) + int64(y.exp)
  1313  	z.mant = z.mant.mul(x.mant, y.mant)
  1314  
  1315  	z.setExpAndRound(e-fnorm(z.mant), 0)
  1316  }
  1317  
  1318  // z = x / y, ignoring signs of x and y for the division
  1319  // but using the sign of z for rounding the result.
  1320  // x and y must have a non-empty mantissa and valid exponent.
  1321  func (z *Float) uquo(x, y *Float) {
  1322  	if debugFloat {
  1323  		validateBinaryOperands(x, y)
  1324  	}
  1325  
  1326  	// mantissa length in words for desired result precision + 1
  1327  	// (at least one extra bit so we get the rounding bit after
  1328  	// the division)
  1329  	n := int(z.prec/_W) + 1
  1330  
  1331  	// compute adjusted x.mant such that we get enough result precision
  1332  	xadj := x.mant
  1333  	if d := n - len(x.mant) + len(y.mant); d > 0 {
  1334  		// d extra words needed => add d "0 digits" to x
  1335  		xadj = make(nat, len(x.mant)+d)
  1336  		copy(xadj[d:], x.mant)
  1337  	}
  1338  	// TODO(gri): If we have too many digits (d < 0), we should be able
  1339  	// to shorten x for faster division. But we must be extra careful
  1340  	// with rounding in that case.
  1341  
  1342  	// Compute d before division since there may be aliasing of x.mant
  1343  	// (via xadj) or y.mant with z.mant.
  1344  	d := len(xadj) - len(y.mant)
  1345  
  1346  	// divide
  1347  	var r nat
  1348  	z.mant, r = z.mant.div(nil, xadj, y.mant)
  1349  	e := int64(x.exp) - int64(y.exp) - int64(d-len(z.mant))*_W
  1350  
  1351  	// The result is long enough to include (at least) the rounding bit.
  1352  	// If there's a non-zero remainder, the corresponding fractional part
  1353  	// (if it were computed), would have a non-zero sticky bit (if it were
  1354  	// zero, it couldn't have a non-zero remainder).
  1355  	var sbit uint
  1356  	if len(r) > 0 {
  1357  		sbit = 1
  1358  	}
  1359  
  1360  	z.setExpAndRound(e-fnorm(z.mant), sbit)
  1361  }
  1362  
  1363  // ucmp returns -1, 0, or +1, depending on whether
  1364  // |x| < |y|, |x| == |y|, or |x| > |y|.
  1365  // x and y must have a non-empty mantissa and valid exponent.
  1366  func (x *Float) ucmp(y *Float) int {
  1367  	if debugFloat {
  1368  		validateBinaryOperands(x, y)
  1369  	}
  1370  
  1371  	switch {
  1372  	case x.exp < y.exp:
  1373  		return -1
  1374  	case x.exp > y.exp:
  1375  		return +1
  1376  	}
  1377  	// x.exp == y.exp
  1378  
  1379  	// compare mantissas
  1380  	i := len(x.mant)
  1381  	j := len(y.mant)
  1382  	for i > 0 || j > 0 {
  1383  		var xm, ym Word
  1384  		if i > 0 {
  1385  			i--
  1386  			xm = x.mant[i]
  1387  		}
  1388  		if j > 0 {
  1389  			j--
  1390  			ym = y.mant[j]
  1391  		}
  1392  		switch {
  1393  		case xm < ym:
  1394  			return -1
  1395  		case xm > ym:
  1396  			return +1
  1397  		}
  1398  	}
  1399  
  1400  	return 0
  1401  }
  1402  
  1403  // Handling of sign bit as defined by IEEE 754-2008, section 6.3:
  1404  //
  1405  // When neither the inputs nor result are NaN, the sign of a product or
  1406  // quotient is the exclusive OR of the operands’ signs; the sign of a sum,
  1407  // or of a difference x−y regarded as a sum x+(−y), differs from at most
  1408  // one of the addends’ signs; and the sign of the result of conversions,
  1409  // the quantize operation, the roundToIntegral operations, and the
  1410  // roundToIntegralExact (see 5.3.1) is the sign of the first or only operand.
  1411  // These rules shall apply even when operands or results are zero or infinite.
  1412  //
  1413  // When the sum of two operands with opposite signs (or the difference of
  1414  // two operands with like signs) is exactly zero, the sign of that sum (or
  1415  // difference) shall be +0 in all rounding-direction attributes except
  1416  // roundTowardNegative; under that attribute, the sign of an exact zero
  1417  // sum (or difference) shall be −0. However, x+x = x−(−x) retains the same
  1418  // sign as x even when x is zero.
  1419  //
  1420  // See also: https://play.golang.org/p/RtH3UCt5IH
  1421  
  1422  // Add sets z to the rounded sum x+y and returns z. If z's precision is 0,
  1423  // it is changed to the larger of x's or y's precision before the operation.
  1424  // Rounding is performed according to z's precision and rounding mode; and
  1425  // z's accuracy reports the result error relative to the exact (not rounded)
  1426  // result. Add panics with ErrNaN if x and y are infinities with opposite
  1427  // signs. The value of z is undefined in that case.
  1428  //
  1429  // BUG(gri) When rounding ToNegativeInf, the sign of Float values rounded to 0 is incorrect.
  1430  func (z *Float) Add(x, y *Float) *Float {
  1431  	if debugFloat {
  1432  		x.validate()
  1433  		y.validate()
  1434  	}
  1435  
  1436  	if z.prec == 0 {
  1437  		z.prec = umax32(x.prec, y.prec)
  1438  	}
  1439  
  1440  	if x.form == finite && y.form == finite {
  1441  		// x + y (common case)
  1442  
  1443  		// Below we set z.neg = x.neg, and when z aliases y this will
  1444  		// change the y operand's sign. This is fine, because if an
  1445  		// operand aliases the receiver it'll be overwritten, but we still
  1446  		// want the original x.neg and y.neg values when we evaluate
  1447  		// x.neg != y.neg, so we need to save y.neg before setting z.neg.
  1448  		yneg := y.neg
  1449  
  1450  		z.neg = x.neg
  1451  		if x.neg == yneg {
  1452  			// x + y == x + y
  1453  			// (-x) + (-y) == -(x + y)
  1454  			z.uadd(x, y)
  1455  		} else {
  1456  			// x + (-y) == x - y == -(y - x)
  1457  			// (-x) + y == y - x == -(x - y)
  1458  			if x.ucmp(y) > 0 {
  1459  				z.usub(x, y)
  1460  			} else {
  1461  				z.neg = !z.neg
  1462  				z.usub(y, x)
  1463  			}
  1464  		}
  1465  		return z
  1466  	}
  1467  
  1468  	if x.form == inf && y.form == inf && x.neg != y.neg {
  1469  		// +Inf + -Inf
  1470  		// -Inf + +Inf
  1471  		// value of z is undefined but make sure it's valid
  1472  		z.acc = Exact
  1473  		z.form = zero
  1474  		z.neg = false
  1475  		panic(ErrNaN{"addition of infinities with opposite signs"})
  1476  	}
  1477  
  1478  	if x.form == zero && y.form == zero {
  1479  		// ±0 + ±0
  1480  		z.acc = Exact
  1481  		z.form = zero
  1482  		z.neg = x.neg && y.neg // -0 + -0 == -0
  1483  		return z
  1484  	}
  1485  
  1486  	if x.form == inf || y.form == zero {
  1487  		// ±Inf + y
  1488  		// x + ±0
  1489  		return z.Set(x)
  1490  	}
  1491  
  1492  	// ±0 + y
  1493  	// x + ±Inf
  1494  	return z.Set(y)
  1495  }
  1496  
  1497  // Sub sets z to the rounded difference x-y and returns z.
  1498  // Precision, rounding, and accuracy reporting are as for Add.
  1499  // Sub panics with ErrNaN if x and y are infinities with equal
  1500  // signs. The value of z is undefined in that case.
  1501  func (z *Float) Sub(x, y *Float) *Float {
  1502  	if debugFloat {
  1503  		x.validate()
  1504  		y.validate()
  1505  	}
  1506  
  1507  	if z.prec == 0 {
  1508  		z.prec = umax32(x.prec, y.prec)
  1509  	}
  1510  
  1511  	if x.form == finite && y.form == finite {
  1512  		// x - y (common case)
  1513  		yneg := y.neg
  1514  		z.neg = x.neg
  1515  		if x.neg != yneg {
  1516  			// x - (-y) == x + y
  1517  			// (-x) - y == -(x + y)
  1518  			z.uadd(x, y)
  1519  		} else {
  1520  			// x - y == x - y == -(y - x)
  1521  			// (-x) - (-y) == y - x == -(x - y)
  1522  			if x.ucmp(y) > 0 {
  1523  				z.usub(x, y)
  1524  			} else {
  1525  				z.neg = !z.neg
  1526  				z.usub(y, x)
  1527  			}
  1528  		}
  1529  		return z
  1530  	}
  1531  
  1532  	if x.form == inf && y.form == inf && x.neg == y.neg {
  1533  		// +Inf - +Inf
  1534  		// -Inf - -Inf
  1535  		// value of z is undefined but make sure it's valid
  1536  		z.acc = Exact
  1537  		z.form = zero
  1538  		z.neg = false
  1539  		panic(ErrNaN{"subtraction of infinities with equal signs"})
  1540  	}
  1541  
  1542  	if x.form == zero && y.form == zero {
  1543  		// ±0 - ±0
  1544  		z.acc = Exact
  1545  		z.form = zero
  1546  		z.neg = x.neg && !y.neg // -0 - +0 == -0
  1547  		return z
  1548  	}
  1549  
  1550  	if x.form == inf || y.form == zero {
  1551  		// ±Inf - y
  1552  		// x - ±0
  1553  		return z.Set(x)
  1554  	}
  1555  
  1556  	// ±0 - y
  1557  	// x - ±Inf
  1558  	return z.Neg(y)
  1559  }
  1560  
  1561  // Mul sets z to the rounded product x*y and returns z.
  1562  // Precision, rounding, and accuracy reporting are as for Add.
  1563  // Mul panics with ErrNaN if one operand is zero and the other
  1564  // operand an infinity. The value of z is undefined in that case.
  1565  func (z *Float) Mul(x, y *Float) *Float {
  1566  	if debugFloat {
  1567  		x.validate()
  1568  		y.validate()
  1569  	}
  1570  
  1571  	if z.prec == 0 {
  1572  		z.prec = umax32(x.prec, y.prec)
  1573  	}
  1574  
  1575  	z.neg = x.neg != y.neg
  1576  
  1577  	if x.form == finite && y.form == finite {
  1578  		// x * y (common case)
  1579  		z.umul(x, y)
  1580  		return z
  1581  	}
  1582  
  1583  	z.acc = Exact
  1584  	if x.form == zero && y.form == inf || x.form == inf && y.form == zero {
  1585  		// ±0 * ±Inf
  1586  		// ±Inf * ±0
  1587  		// value of z is undefined but make sure it's valid
  1588  		z.form = zero
  1589  		z.neg = false
  1590  		panic(ErrNaN{"multiplication of zero with infinity"})
  1591  	}
  1592  
  1593  	if x.form == inf || y.form == inf {
  1594  		// ±Inf * y
  1595  		// x * ±Inf
  1596  		z.form = inf
  1597  		return z
  1598  	}
  1599  
  1600  	// ±0 * y
  1601  	// x * ±0
  1602  	z.form = zero
  1603  	return z
  1604  }
  1605  
  1606  // Quo sets z to the rounded quotient x/y and returns z.
  1607  // Precision, rounding, and accuracy reporting are as for Add.
  1608  // Quo panics with ErrNaN if both operands are zero or infinities.
  1609  // The value of z is undefined in that case.
  1610  func (z *Float) Quo(x, y *Float) *Float {
  1611  	if debugFloat {
  1612  		x.validate()
  1613  		y.validate()
  1614  	}
  1615  
  1616  	if z.prec == 0 {
  1617  		z.prec = umax32(x.prec, y.prec)
  1618  	}
  1619  
  1620  	z.neg = x.neg != y.neg
  1621  
  1622  	if x.form == finite && y.form == finite {
  1623  		// x / y (common case)
  1624  		z.uquo(x, y)
  1625  		return z
  1626  	}
  1627  
  1628  	z.acc = Exact
  1629  	if x.form == zero && y.form == zero || x.form == inf && y.form == inf {
  1630  		// ±0 / ±0
  1631  		// ±Inf / ±Inf
  1632  		// value of z is undefined but make sure it's valid
  1633  		z.form = zero
  1634  		z.neg = false
  1635  		panic(ErrNaN{"division of zero by zero or infinity by infinity"})
  1636  	}
  1637  
  1638  	if x.form == zero || y.form == inf {
  1639  		// ±0 / y
  1640  		// x / ±Inf
  1641  		z.form = zero
  1642  		return z
  1643  	}
  1644  
  1645  	// x / ±0
  1646  	// ±Inf / y
  1647  	z.form = inf
  1648  	return z
  1649  }
  1650  
  1651  // Cmp compares x and y and returns:
  1652  //
  1653  //   -1 if x <  y
  1654  //    0 if x == y (incl. -0 == 0, -Inf == -Inf, and +Inf == +Inf)
  1655  //   +1 if x >  y
  1656  //
  1657  func (x *Float) Cmp(y *Float) int {
  1658  	if debugFloat {
  1659  		x.validate()
  1660  		y.validate()
  1661  	}
  1662  
  1663  	mx := x.ord()
  1664  	my := y.ord()
  1665  	switch {
  1666  	case mx < my:
  1667  		return -1
  1668  	case mx > my:
  1669  		return +1
  1670  	}
  1671  	// mx == my
  1672  
  1673  	// only if |mx| == 1 we have to compare the mantissae
  1674  	switch mx {
  1675  	case -1:
  1676  		return y.ucmp(x)
  1677  	case +1:
  1678  		return x.ucmp(y)
  1679  	}
  1680  
  1681  	return 0
  1682  }
  1683  
  1684  // ord classifies x and returns:
  1685  //
  1686  //	-2 if -Inf == x
  1687  //	-1 if -Inf < x < 0
  1688  //	 0 if x == 0 (signed or unsigned)
  1689  //	+1 if 0 < x < +Inf
  1690  //	+2 if x == +Inf
  1691  //
  1692  func (x *Float) ord() int {
  1693  	var m int
  1694  	switch x.form {
  1695  	case finite:
  1696  		m = 1
  1697  	case zero:
  1698  		return 0
  1699  	case inf:
  1700  		m = 2
  1701  	}
  1702  	if x.neg {
  1703  		m = -m
  1704  	}
  1705  	return m
  1706  }
  1707  
  1708  func umax32(x, y uint32) uint32 {
  1709  	if x > y {
  1710  		return x
  1711  	}
  1712  	return y
  1713  }