github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/strconv/ftoaryu.go

github.com/twelsh-aw/go/src@v0.0.0-20230516233729-a56fe86a7c81/strconv/ftoaryu.go (about)

     1  // Copyright 2021 The Go Authors. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in the LICENSE file.
     4  
     5  package strconv
     6  
     7  import (
     8  	"math/bits"
     9  )
    10  
    11  // binary to decimal conversion using the Ryū algorithm.
    12  //
    13  // See Ulf Adams, "Ryū: Fast Float-to-String Conversion" (doi:10.1145/3192366.3192369)
    14  //
    15  // Fixed precision formatting is a variant of the original paper's
    16  // algorithm, where a single multiplication by 10^k is required,
    17  // sharing the same rounding guarantees.
    18  
    19  // ryuFtoaFixed32 formats mant*(2^exp) with prec decimal digits.
    20  func ryuFtoaFixed32(d *decimalSlice, mant uint32, exp int, prec int) {
    21  	if prec < 0 {
    22  		panic("ryuFtoaFixed32 called with negative prec")
    23  	}
    24  	if prec > 9 {
    25  		panic("ryuFtoaFixed32 called with prec > 9")
    26  	}
    27  	// Zero input.
    28  	if mant == 0 {
    29  		d.nd, d.dp = 0, 0
    30  		return
    31  	}
    32  	// Renormalize to a 25-bit mantissa.
    33  	e2 := exp
    34  	if b := bits.Len32(mant); b < 25 {
    35  		mant <<= uint(25 - b)
    36  		e2 += b - 25
    37  	}
    38  	// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
    39  	// at least prec decimal digits, i.e
    40  	//     mant*(2^e2)*(10^q) >= 10^(prec-1)
    41  	// Because mant >= 2^24, it is enough to choose:
    42  	//     2^(e2+24) >= 10^(-q+prec-1)
    43  	// or q = -mulByLog2Log10(e2+24) + prec - 1
    44  	q := -mulByLog2Log10(e2+24) + prec - 1
    45  
    46  	// Now compute mant*(2^e2)*(10^q).
    47  	// Is it an exact computation?
    48  	// Only small positive powers of 10 are exact (5^28 has 66 bits).
    49  	exact := q <= 27 && q >= 0
    50  
    51  	di, dexp2, d0 := mult64bitPow10(mant, e2, q)
    52  	if dexp2 >= 0 {
    53  		panic("not enough significant bits after mult64bitPow10")
    54  	}
    55  	// As a special case, computation might still be exact, if exponent
    56  	// was negative and if it amounts to computing an exact division.
    57  	// In that case, we ignore all lower bits.
    58  	// Note that division by 10^11 cannot be exact as 5^11 has 26 bits.
    59  	if q < 0 && q >= -10 && divisibleByPower5(uint64(mant), -q) {
    60  		exact = true
    61  		d0 = true
    62  	}
    63  	// Remove extra lower bits and keep rounding info.
    64  	extra := uint(-dexp2)
    65  	extraMask := uint32(1<<extra - 1)
    66  
    67  	di, dfrac := di>>extra, di&extraMask
    68  	roundUp := false
    69  	if exact {
    70  		// If we computed an exact product, d + 1/2
    71  		// should round to d+1 if 'd' is odd.
    72  		roundUp = dfrac > 1<<(extra-1) ||
    73  			(dfrac == 1<<(extra-1) && !d0) ||
    74  			(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
    75  	} else {
    76  		// otherwise, d+1/2 always rounds up because
    77  		// we truncated below.
    78  		roundUp = dfrac>>(extra-1) == 1
    79  	}
    80  	if dfrac != 0 {
    81  		d0 = false
    82  	}
    83  	// Proceed to the requested number of digits
    84  	formatDecimal(d, uint64(di), !d0, roundUp, prec)
    85  	// Adjust exponent
    86  	d.dp -= q
    87  }
    88  
    89  // ryuFtoaFixed64 formats mant*(2^exp) with prec decimal digits.
    90  func ryuFtoaFixed64(d *decimalSlice, mant uint64, exp int, prec int) {
    91  	if prec > 18 {
    92  		panic("ryuFtoaFixed64 called with prec > 18")
    93  	}
    94  	// Zero input.
    95  	if mant == 0 {
    96  		d.nd, d.dp = 0, 0
    97  		return
    98  	}
    99  	// Renormalize to a 55-bit mantissa.
   100  	e2 := exp
   101  	if b := bits.Len64(mant); b < 55 {
   102  		mant = mant << uint(55-b)
   103  		e2 += b - 55
   104  	}
   105  	// Choose an exponent such that rounded mant*(2^e2)*(10^q) has
   106  	// at least prec decimal digits, i.e
   107  	//     mant*(2^e2)*(10^q) >= 10^(prec-1)
   108  	// Because mant >= 2^54, it is enough to choose:
   109  	//     2^(e2+54) >= 10^(-q+prec-1)
   110  	// or q = -mulByLog2Log10(e2+54) + prec - 1
   111  	//
   112  	// The minimal required exponent is -mulByLog2Log10(1025)+18 = -291
   113  	// The maximal required exponent is mulByLog2Log10(1074)+18 = 342
   114  	q := -mulByLog2Log10(e2+54) + prec - 1
   115  
   116  	// Now compute mant*(2^e2)*(10^q).
   117  	// Is it an exact computation?
   118  	// Only small positive powers of 10 are exact (5^55 has 128 bits).
   119  	exact := q <= 55 && q >= 0
   120  
   121  	di, dexp2, d0 := mult128bitPow10(mant, e2, q)
   122  	if dexp2 >= 0 {
   123  		panic("not enough significant bits after mult128bitPow10")
   124  	}
   125  	// As a special case, computation might still be exact, if exponent
   126  	// was negative and if it amounts to computing an exact division.
   127  	// In that case, we ignore all lower bits.
   128  	// Note that division by 10^23 cannot be exact as 5^23 has 54 bits.
   129  	if q < 0 && q >= -22 && divisibleByPower5(mant, -q) {
   130  		exact = true
   131  		d0 = true
   132  	}
   133  	// Remove extra lower bits and keep rounding info.
   134  	extra := uint(-dexp2)
   135  	extraMask := uint64(1<<extra - 1)
   136  
   137  	di, dfrac := di>>extra, di&extraMask
   138  	roundUp := false
   139  	if exact {
   140  		// If we computed an exact product, d + 1/2
   141  		// should round to d+1 if 'd' is odd.
   142  		roundUp = dfrac > 1<<(extra-1) ||
   143  			(dfrac == 1<<(extra-1) && !d0) ||
   144  			(dfrac == 1<<(extra-1) && d0 && di&1 == 1)
   145  	} else {
   146  		// otherwise, d+1/2 always rounds up because
   147  		// we truncated below.
   148  		roundUp = dfrac>>(extra-1) == 1
   149  	}
   150  	if dfrac != 0 {
   151  		d0 = false
   152  	}
   153  	// Proceed to the requested number of digits
   154  	formatDecimal(d, di, !d0, roundUp, prec)
   155  	// Adjust exponent
   156  	d.dp -= q
   157  }
   158  
   159  var uint64pow10 = [...]uint64{
   160  	1, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9,
   161  	1e10, 1e11, 1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19,
   162  }
   163  
   164  // formatDecimal fills d with at most prec decimal digits
   165  // of mantissa m. The boolean trunc indicates whether m
   166  // is truncated compared to the original number being formatted.
   167  func formatDecimal(d *decimalSlice, m uint64, trunc bool, roundUp bool, prec int) {
   168  	max := uint64pow10[prec]
   169  	trimmed := 0
   170  	for m >= max {
   171  		a, b := m/10, m%10
   172  		m = a
   173  		trimmed++
   174  		if b > 5 {
   175  			roundUp = true
   176  		} else if b < 5 {
   177  			roundUp = false
   178  		} else { // b == 5
   179  			// round up if there are trailing digits,
   180  			// or if the new value of m is odd (round-to-even convention)
   181  			roundUp = trunc || m&1 == 1
   182  		}
   183  		if b != 0 {
   184  			trunc = true
   185  		}
   186  	}
   187  	if roundUp {
   188  		m++
   189  	}
   190  	if m >= max {
   191  		// Happens if di was originally 99999....xx
   192  		m /= 10
   193  		trimmed++
   194  	}
   195  	// render digits (similar to formatBits)
   196  	n := uint(prec)
   197  	d.nd = prec
   198  	v := m
   199  	for v >= 100 {
   200  		var v1, v2 uint64
   201  		if v>>32 == 0 {
   202  			v1, v2 = uint64(uint32(v)/100), uint64(uint32(v)%100)
   203  		} else {
   204  			v1, v2 = v/100, v%100
   205  		}
   206  		n -= 2
   207  		d.d[n+1] = smallsString[2*v2+1]
   208  		d.d[n+0] = smallsString[2*v2+0]
   209  		v = v1
   210  	}
   211  	if v > 0 {
   212  		n--
   213  		d.d[n] = smallsString[2*v+1]
   214  	}
   215  	if v >= 10 {
   216  		n--
   217  		d.d[n] = smallsString[2*v]
   218  	}
   219  	for d.d[d.nd-1] == '0' {
   220  		d.nd--
   221  		trimmed++
   222  	}
   223  	d.dp = d.nd + trimmed
   224  }
   225  
   226  // ryuFtoaShortest formats mant*2^exp with prec decimal digits.
   227  func ryuFtoaShortest(d *decimalSlice, mant uint64, exp int, flt *floatInfo) {
   228  	if mant == 0 {
   229  		d.nd, d.dp = 0, 0
   230  		return
   231  	}
   232  	// If input is an exact integer with fewer bits than the mantissa,
   233  	// the previous and next integer are not admissible representations.
   234  	if exp <= 0 && bits.TrailingZeros64(mant) >= -exp {
   235  		mant >>= uint(-exp)
   236  		ryuDigits(d, mant, mant, mant, true, false)
   237  		return
   238  	}
   239  	ml, mc, mu, e2 := computeBounds(mant, exp, flt)
   240  	if e2 == 0 {
   241  		ryuDigits(d, ml, mc, mu, true, false)
   242  		return
   243  	}
   244  	// Find 10^q *larger* than 2^-e2
   245  	q := mulByLog2Log10(-e2) + 1
   246  
   247  	// We are going to multiply by 10^q using 128-bit arithmetic.
   248  	// The exponent is the same for all 3 numbers.
   249  	var dl, dc, du uint64
   250  	var dl0, dc0, du0 bool
   251  	if flt == &float32info {
   252  		var dl32, dc32, du32 uint32
   253  		dl32, _, dl0 = mult64bitPow10(uint32(ml), e2, q)
   254  		dc32, _, dc0 = mult64bitPow10(uint32(mc), e2, q)
   255  		du32, e2, du0 = mult64bitPow10(uint32(mu), e2, q)
   256  		dl, dc, du = uint64(dl32), uint64(dc32), uint64(du32)
   257  	} else {
   258  		dl, _, dl0 = mult128bitPow10(ml, e2, q)
   259  		dc, _, dc0 = mult128bitPow10(mc, e2, q)
   260  		du, e2, du0 = mult128bitPow10(mu, e2, q)
   261  	}
   262  	if e2 >= 0 {
   263  		panic("not enough significant bits after mult128bitPow10")
   264  	}
   265  	// Is it an exact computation?
   266  	if q > 55 {
   267  		// Large positive powers of ten are not exact
   268  		dl0, dc0, du0 = false, false, false
   269  	}
   270  	if q < 0 && q >= -24 {
   271  		// Division by a power of ten may be exact.
   272  		// (note that 5^25 is a 59-bit number so division by 5^25 is never exact).
   273  		if divisibleByPower5(ml, -q) {
   274  			dl0 = true
   275  		}
   276  		if divisibleByPower5(mc, -q) {
   277  			dc0 = true
   278  		}
   279  		if divisibleByPower5(mu, -q) {
   280  			du0 = true
   281  		}
   282  	}
   283  	// Express the results (dl, dc, du)*2^e2 as integers.
   284  	// Extra bits must be removed and rounding hints computed.
   285  	extra := uint(-e2)
   286  	extraMask := uint64(1<<extra - 1)
   287  	// Now compute the floored, integral base 10 mantissas.
   288  	dl, fracl := dl>>extra, dl&extraMask
   289  	dc, fracc := dc>>extra, dc&extraMask
   290  	du, fracu := du>>extra, du&extraMask
   291  	// Is it allowed to use 'du' as a result?
   292  	// It is always allowed when it is truncated, but also
   293  	// if it is exact and the original binary mantissa is even
   294  	// When disallowed, we can subtract 1.
   295  	uok := !du0 || fracu > 0
   296  	if du0 && fracu == 0 {
   297  		uok = mant&1 == 0
   298  	}
   299  	if !uok {
   300  		du--
   301  	}
   302  	// Is 'dc' the correctly rounded base 10 mantissa?
   303  	// The correct rounding might be dc+1
   304  	cup := false // don't round up.
   305  	if dc0 {
   306  		// If we computed an exact product, the half integer
   307  		// should round to next (even) integer if 'dc' is odd.
   308  		cup = fracc > 1<<(extra-1) ||
   309  			(fracc == 1<<(extra-1) && dc&1 == 1)
   310  	} else {
   311  		// otherwise, the result is a lower truncation of the ideal
   312  		// result.
   313  		cup = fracc>>(extra-1) == 1
   314  	}
   315  	// Is 'dl' an allowed representation?
   316  	// Only if it is an exact value, and if the original binary mantissa
   317  	// was even.
   318  	lok := dl0 && fracl == 0 && (mant&1 == 0)
   319  	if !lok {
   320  		dl++
   321  	}
   322  	// We need to remember whether the trimmed digits of 'dc' are zero.
   323  	c0 := dc0 && fracc == 0
   324  	// render digits
   325  	ryuDigits(d, dl, dc, du, c0, cup)
   326  	d.dp -= q
   327  }
   328  
   329  // mulByLog2Log10 returns math.Floor(x * log(2)/log(10)) for an integer x in
   330  // the range -1600 <= x && x <= +1600.
   331  //
   332  // The range restriction lets us work in faster integer arithmetic instead of
   333  // slower floating point arithmetic. Correctness is verified by unit tests.
   334  func mulByLog2Log10(x int) int {
   335  	// log(2)/log(10) ≈ 0.30102999566 ≈ 78913 / 2^18
   336  	return (x * 78913) >> 18
   337  }
   338  
   339  // mulByLog10Log2 returns math.Floor(x * log(10)/log(2)) for an integer x in
   340  // the range -500 <= x && x <= +500.
   341  //
   342  // The range restriction lets us work in faster integer arithmetic instead of
   343  // slower floating point arithmetic. Correctness is verified by unit tests.
   344  func mulByLog10Log2(x int) int {
   345  	// log(10)/log(2) ≈ 3.32192809489 ≈ 108853 / 2^15
   346  	return (x * 108853) >> 15
   347  }
   348  
   349  // computeBounds returns a floating-point vector (l, c, u)×2^e2
   350  // where the mantissas are 55-bit (or 26-bit) integers, describing the interval
   351  // represented by the input float64 or float32.
   352  func computeBounds(mant uint64, exp int, flt *floatInfo) (lower, central, upper uint64, e2 int) {
   353  	if mant != 1<<flt.mantbits || exp == flt.bias+1-int(flt.mantbits) {
   354  		// regular case (or denormals)
   355  		lower, central, upper = 2*mant-1, 2*mant, 2*mant+1
   356  		e2 = exp - 1
   357  		return
   358  	} else {
   359  		// border of an exponent
   360  		lower, central, upper = 4*mant-1, 4*mant, 4*mant+2
   361  		e2 = exp - 2
   362  		return
   363  	}
   364  }
   365  
   366  func ryuDigits(d *decimalSlice, lower, central, upper uint64,
   367  	c0, cup bool) {
   368  	lhi, llo := divmod1e9(lower)
   369  	chi, clo := divmod1e9(central)
   370  	uhi, ulo := divmod1e9(upper)
   371  	if uhi == 0 {
   372  		// only low digits (for denormals)
   373  		ryuDigits32(d, llo, clo, ulo, c0, cup, 8)
   374  	} else if lhi < uhi {
   375  		// truncate 9 digits at once.
   376  		if llo != 0 {
   377  			lhi++
   378  		}
   379  		c0 = c0 && clo == 0
   380  		cup = (clo > 5e8) || (clo == 5e8 && cup)
   381  		ryuDigits32(d, lhi, chi, uhi, c0, cup, 8)
   382  		d.dp += 9
   383  	} else {
   384  		d.nd = 0
   385  		// emit high part
   386  		n := uint(9)
   387  		for v := chi; v > 0; {
   388  			v1, v2 := v/10, v%10
   389  			v = v1
   390  			n--
   391  			d.d[n] = byte(v2 + '0')
   392  		}
   393  		d.d = d.d[n:]
   394  		d.nd = int(9 - n)
   395  		// emit low part
   396  		ryuDigits32(d, llo, clo, ulo,
   397  			c0, cup, d.nd+8)
   398  	}
   399  	// trim trailing zeros
   400  	for d.nd > 0 && d.d[d.nd-1] == '0' {
   401  		d.nd--
   402  	}
   403  	// trim initial zeros
   404  	for d.nd > 0 && d.d[0] == '0' {
   405  		d.nd--
   406  		d.dp--
   407  		d.d = d.d[1:]
   408  	}
   409  }
   410  
   411  // ryuDigits32 emits decimal digits for a number less than 1e9.
   412  func ryuDigits32(d *decimalSlice, lower, central, upper uint32,
   413  	c0, cup bool, endindex int) {
   414  	if upper == 0 {
   415  		d.dp = endindex + 1
   416  		return
   417  	}
   418  	trimmed := 0
   419  	// Remember last trimmed digit to check for round-up.
   420  	// c0 will be used to remember zeroness of following digits.
   421  	cNextDigit := 0
   422  	for upper > 0 {
   423  		// Repeatedly compute:
   424  		// l = Ceil(lower / 10^k)
   425  		// c = Round(central / 10^k)
   426  		// u = Floor(upper / 10^k)
   427  		// and stop when c goes out of the (l, u) interval.
   428  		l := (lower + 9) / 10
   429  		c, cdigit := central/10, central%10
   430  		u := upper / 10
   431  		if l > u {
   432  			// don't trim the last digit as it is forbidden to go below l
   433  			// other, trim and exit now.
   434  			break
   435  		}
   436  		// Check that we didn't cross the lower boundary.
   437  		// The case where l < u but c == l-1 is essentially impossible,
   438  		// but may happen if:
   439  		//    lower   = ..11
   440  		//    central = ..19
   441  		//    upper   = ..31
   442  		// and means that 'central' is very close but less than
   443  		// an integer ending with many zeros, and usually
   444  		// the "round-up" logic hides the problem.
   445  		if l == c+1 && c < u {
   446  			c++
   447  			cdigit = 0
   448  			cup = false
   449  		}
   450  		trimmed++
   451  		// Remember trimmed digits of c
   452  		c0 = c0 && cNextDigit == 0
   453  		cNextDigit = int(cdigit)
   454  		lower, central, upper = l, c, u
   455  	}
   456  	// should we round up?
   457  	if trimmed > 0 {
   458  		cup = cNextDigit > 5 ||
   459  			(cNextDigit == 5 && !c0) ||
   460  			(cNextDigit == 5 && c0 && central&1 == 1)
   461  	}
   462  	if central < upper && cup {
   463  		central++
   464  	}
   465  	// We know where the number ends, fill directly
   466  	endindex -= trimmed
   467  	v := central
   468  	n := endindex
   469  	for n > d.nd {
   470  		v1, v2 := v/100, v%100
   471  		d.d[n] = smallsString[2*v2+1]
   472  		d.d[n-1] = smallsString[2*v2+0]
   473  		n -= 2
   474  		v = v1
   475  	}
   476  	if n == d.nd {
   477  		d.d[n] = byte(v + '0')
   478  	}
   479  	d.nd = endindex + 1
   480  	d.dp = d.nd + trimmed
   481  }
   482  
   483  // mult64bitPow10 takes a floating-point input with a 25-bit
   484  // mantissa and multiplies it with 10^q. The resulting mantissa
   485  // is m*P >> 57 where P is a 64-bit element of the detailedPowersOfTen tables.
   486  // It is typically 31 or 32-bit wide.
   487  // The returned boolean is true if all trimmed bits were zero.
   488  //
   489  // That is:
   490  //
   491  //	m*2^e2 * round(10^q) = resM * 2^resE + ε
   492  //	exact = ε == 0
   493  func mult64bitPow10(m uint32, e2, q int) (resM uint32, resE int, exact bool) {
   494  	if q == 0 {
   495  		// P == 1<<63
   496  		return m << 6, e2 - 6, true
   497  	}
   498  	if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
   499  		// This never happens due to the range of float32/float64 exponent
   500  		panic("mult64bitPow10: power of 10 is out of range")
   501  	}
   502  	pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10][1]
   503  	if q < 0 {
   504  		// Inverse powers of ten must be rounded up.
   505  		pow += 1
   506  	}
   507  	hi, lo := bits.Mul64(uint64(m), pow)
   508  	e2 += mulByLog10Log2(q) - 63 + 57
   509  	return uint32(hi<<7 | lo>>57), e2, lo<<7 == 0
   510  }
   511  
   512  // mult128bitPow10 takes a floating-point input with a 55-bit
   513  // mantissa and multiplies it with 10^q. The resulting mantissa
   514  // is m*P >> 119 where P is a 128-bit element of the detailedPowersOfTen tables.
   515  // It is typically 63 or 64-bit wide.
   516  // The returned boolean is true is all trimmed bits were zero.
   517  //
   518  // That is:
   519  //
   520  //	m*2^e2 * round(10^q) = resM * 2^resE + ε
   521  //	exact = ε == 0
   522  func mult128bitPow10(m uint64, e2, q int) (resM uint64, resE int, exact bool) {
   523  	if q == 0 {
   524  		// P == 1<<127
   525  		return m << 8, e2 - 8, true
   526  	}
   527  	if q < detailedPowersOfTenMinExp10 || detailedPowersOfTenMaxExp10 < q {
   528  		// This never happens due to the range of float32/float64 exponent
   529  		panic("mult128bitPow10: power of 10 is out of range")
   530  	}
   531  	pow := detailedPowersOfTen[q-detailedPowersOfTenMinExp10]
   532  	if q < 0 {
   533  		// Inverse powers of ten must be rounded up.
   534  		pow[0] += 1
   535  	}
   536  	e2 += mulByLog10Log2(q) - 127 + 119
   537  
   538  	// long multiplication
   539  	l1, l0 := bits.Mul64(m, pow[0])
   540  	h1, h0 := bits.Mul64(m, pow[1])
   541  	mid, carry := bits.Add64(l1, h0, 0)
   542  	h1 += carry
   543  	return h1<<9 | mid>>55, e2, mid<<9 == 0 && l0 == 0
   544  }
   545  
   546  func divisibleByPower5(m uint64, k int) bool {
   547  	if m == 0 {
   548  		return true
   549  	}
   550  	for i := 0; i < k; i++ {
   551  		if m%5 != 0 {
   552  			return false
   553  		}
   554  		m /= 5
   555  	}
   556  	return true
   557  }
   558  
   559  // divmod1e9 computes quotient and remainder of division by 1e9,
   560  // avoiding runtime uint64 division on 32-bit platforms.
   561  func divmod1e9(x uint64) (uint32, uint32) {
   562  	if !host32bit {
   563  		return uint32(x / 1e9), uint32(x % 1e9)
   564  	}
   565  	// Use the same sequence of operations as the amd64 compiler.
   566  	hi, _ := bits.Mul64(x>>1, 0x89705f4136b4a598) // binary digits of 1e-9
   567  	q := hi >> 28
   568  	return uint32(q), uint32(x - q*1e9)
   569  }