github.com/tobgu/qframe@v0.4.0/internal/ryu/ryu64.go

github.com/tobgu/qframe@v0.4.0/internal/ryu/ryu64.go (about)

     1  // Copyright 2018 Ulf Adams
     2  // Modifications copyright 2019 Caleb Spare
     3  //
     4  // The contents of this file may be used under the terms of the Apache License,
     5  // Version 2.0.
     6  //
     7  //    (See accompanying file LICENSE or copy at
     8  //     http://www.apache.org/licenses/LICENSE-2.0)
     9  //
    10  // Unless required by applicable law or agreed to in writing, this software
    11  // is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
    12  // KIND, either express or implied.
    13  //
    14  // The code in this file is part of a Go translation of the C code written by
    15  // Ulf Adams which may be found at https://github.com/ulfjack/ryu. That source
    16  // code is licensed under Apache 2.0 and this code is derivative work thereof.
    17  
    18  package ryu
    19  
    20  import (
    21  	"math/bits"
    22  )
    23  
    24  type uint128 struct {
    25  	lo uint64
    26  	hi uint64
    27  }
    28  
    29  // dec64 is a floating decimal type representing m * 10^e.
    30  type dec64 struct {
    31  	m uint64
    32  	e int32
    33  }
    34  
    35  func (d dec64) append(b []byte, neg bool) []byte {
    36  	// Step 5: Print the decimal representation.
    37  	if neg {
    38  		b = append(b, '-')
    39  	}
    40  
    41  	out := d.m
    42  	outLen := decimalLen64(out)
    43  	bufLen := outLen
    44  	if bufLen > 1 {
    45  		bufLen++ // extra space for '.'
    46  	}
    47  
    48  	// Print the decimal digits.
    49  	n := len(b)
    50  	if cap(b)-len(b) >= bufLen {
    51  		// Avoid function call in the common case.
    52  		b = b[:len(b)+bufLen]
    53  	} else {
    54  		b = append(b, make([]byte, bufLen)...)
    55  	}
    56  
    57  	// Avoid expensive 64-bit divisions.
    58  	// We have at most 17 digits, and uint32 can store 9 digits.
    59  	// If the output doesn't fit into a uint32, cut off 8 digits
    60  	// so the rest will fit into a uint32.
    61  	var i int
    62  	if out>>32 > 0 {
    63  		var out32 uint32
    64  		out, out32 = out/1e8, uint32(out%1e8)
    65  		for ; i < 8; i++ {
    66  			b[n+outLen-i] = '0' + byte(out32%10)
    67  			out32 /= 10
    68  		}
    69  	}
    70  	out32 := uint32(out)
    71  	for ; i < outLen-1; i++ {
    72  		b[n+outLen-i] = '0' + byte(out32%10)
    73  		out32 /= 10
    74  	}
    75  	b[n] = '0' + byte(out32%10)
    76  
    77  	// Print the '.' if needed.
    78  	if outLen > 1 {
    79  		b[n+1] = '.'
    80  	}
    81  
    82  	// Print the exponent.
    83  	b = append(b, 'e')
    84  	exp := d.e + int32(outLen) - 1
    85  	if exp < 0 {
    86  		b = append(b, '-')
    87  		exp = -exp
    88  	} else {
    89  		// Unconditionally print a + here to match strconv's formatting.
    90  		b = append(b, '+')
    91  	}
    92  	// Always print at least two digits to match strconv's formatting.
    93  	d2 := exp % 10
    94  	exp /= 10
    95  	d1 := exp % 10
    96  	d0 := exp / 10
    97  	if d0 > 0 {
    98  		b = append(b, '0'+byte(d0))
    99  	}
   100  	b = append(b, '0'+byte(d1), '0'+byte(d2))
   101  
   102  	return b
   103  }
   104  
   105  func sizeSlice(b []byte, bufLen int) []byte {
   106  	if cap(b)-len(b) >= bufLen {
   107  		// Avoid function call in the common case.
   108  		return b[:len(b)+bufLen]
   109  	}
   110  
   111  	return append(b, make([]byte, bufLen)...)
   112  }
   113  
   114  func (d dec64) appendF(b []byte, neg bool) []byte {
   115  	// Step 5: Print the decimal representation.
   116  	if neg {
   117  		b = append(b, '-')
   118  	}
   119  
   120  	out := d.m
   121  	outLen := decimalLen64(out)
   122  
   123  	dE := int(d.e)
   124  	if dE >= 0 {
   125  		// XYZ
   126  		n := len(b)
   127  		b = sizeSlice(b, dE+outLen)
   128  		for i := n; i < dE+n; i++ {
   129  			b[outLen+i] = '0'
   130  		}
   131  
   132  		for i := n + outLen - 1; i >= n; i-- {
   133  			b[i] = '0' + byte(out%10)
   134  			out /= 10
   135  		}
   136  
   137  		return b
   138  	}
   139  
   140  	ePos := -dE
   141  	if ePos >= outLen {
   142  		// 0.XYZ
   143  		b := append(b, "0."...)
   144  		n := len(b)
   145  		b = sizeSlice(b, ePos)
   146  		for i := n + ePos - 1; i >= n; i-- {
   147  			b[i] = '0' + byte(out%10)
   148  			out /= 10
   149  		}
   150  
   151  		return b
   152  	}
   153  
   154  	// Y.XZ
   155  	b = sizeSlice(b, outLen+1) // + "."
   156  	n := len(b)
   157  	i := n - 1
   158  	end := i - outLen
   159  	for ; ePos > 0; i-- {
   160  		b[i] = '0' + byte(out%10)
   161  		out /= 10
   162  		ePos--
   163  	}
   164  
   165  	b[i] = '.'
   166  	i--
   167  
   168  	for ; i >= end; i-- {
   169  		b[i] = '0' + byte(out%10)
   170  		out /= 10
   171  	}
   172  
   173  	return b
   174  }
   175  
   176  func float64ToDecimalExactInt(mant, exp uint64) (d dec64, ok bool) {
   177  	e := exp - bias64
   178  	if e > mantBits64 {
   179  		return d, false
   180  	}
   181  	shift := mantBits64 - e
   182  	mant |= 1 << mantBits64 // implicit 1
   183  	d.m = mant >> shift
   184  	if d.m<<shift != mant {
   185  		return d, false
   186  	}
   187  
   188  	for d.m%10 == 0 {
   189  		d.m /= 10
   190  		d.e++
   191  	}
   192  	return d, true
   193  }
   194  
   195  func float64ToDecimal(mant, exp uint64) dec64 {
   196  	var e2 int32
   197  	var m2 uint64
   198  	if exp == 0 {
   199  		// We subtract 2 so that the bounds computation has
   200  		// 2 additional bits.
   201  		e2 = 1 - bias64 - mantBits64 - 2
   202  		m2 = mant
   203  	} else {
   204  		e2 = int32(exp) - bias64 - mantBits64 - 2
   205  		m2 = uint64(1)<<mantBits64 | mant
   206  	}
   207  	even := m2&1 == 0
   208  	acceptBounds := even
   209  
   210  	// Step 2: Determine the interval of valid decimal representations.
   211  	mv := 4 * m2
   212  	mmShift := boolToUint64(mant != 0 || exp <= 1)
   213  	// We would compute mp and mm like this:
   214  	// mp := 4 * m2 + 2;
   215  	// mm := mv - 1 - mmShift;
   216  
   217  	// Step 3: Convert to a decimal power base uing 128-bit arithmetic.
   218  	var (
   219  		vr, vp, vm        uint64
   220  		e10               int32
   221  		vmIsTrailingZeros bool
   222  		vrIsTrailingZeros bool
   223  	)
   224  	if e2 >= 0 {
   225  		// This expression is slightly faster than max(0, log10Pow2(e2) - 1).
   226  		q := log10Pow2(e2) - boolToUint32(e2 > 3)
   227  		e10 = int32(q)
   228  		k := pow5InvNumBits64 + pow5Bits(int32(q)) - 1
   229  		i := -e2 + int32(q) + k
   230  		mul := pow5InvSplit64[q]
   231  		vr = mulShift64(4*m2, mul, i)
   232  		vp = mulShift64(4*m2+2, mul, i)
   233  		vm = mulShift64(4*m2-1-mmShift, mul, i)
   234  		if q <= 21 {
   235  			// This should use q <= 22, but I think 21 is also safe.
   236  			// Smaller values may still be safe, but it's more
   237  			// difficult to reason about them. Only one of mp, mv,
   238  			// and mm can be a multiple of 5, if any.
   239  			if mv%5 == 0 {
   240  				vrIsTrailingZeros = multipleOfPowerOfFive64(mv, q)
   241  			} else if acceptBounds {
   242  				// Same as min(e2 + (^mm & 1), pow5Factor64(mm)) >= q
   243  				// <=> e2 + (^mm & 1) >= q && pow5Factor64(mm) >= q
   244  				// <=> true && pow5Factor64(mm) >= q, since e2 >= q.
   245  				vmIsTrailingZeros = multipleOfPowerOfFive64(mv-1-mmShift, q)
   246  			} else if multipleOfPowerOfFive64(mv+2, q) {
   247  				vp--
   248  			}
   249  		}
   250  	} else {
   251  		// This expression is slightly faster than max(0, log10Pow5(-e2) - 1).
   252  		q := log10Pow5(-e2) - boolToUint32(-e2 > 1)
   253  		e10 = int32(q) + e2
   254  		i := -e2 - int32(q)
   255  		k := pow5Bits(i) - pow5NumBits64
   256  		j := int32(q) - k
   257  		mul := pow5Split64[i]
   258  		vr = mulShift64(4*m2, mul, j)
   259  		vp = mulShift64(4*m2+2, mul, j)
   260  		vm = mulShift64(4*m2-1-mmShift, mul, j)
   261  		if q <= 1 {
   262  			// {vr,vp,vm} is trailing zeros if {mv,mp,mm} has at least q trailing 0 bits.
   263  			// mv = 4 * m2, so it always has at least two trailing 0 bits.
   264  			vrIsTrailingZeros = true
   265  			if acceptBounds {
   266  				// mm = mv - 1 - mmShift, so it has 1 trailing 0 bit iff mmShift == 1.
   267  				vmIsTrailingZeros = mmShift == 1
   268  			} else {
   269  				// mp = mv + 2, so it always has at least one trailing 0 bit.
   270  				vp--
   271  			}
   272  		} else if q < 63 { // TODO(ulfjack/cespare): Use a tighter bound here.
   273  			// We need to compute min(ntz(mv), pow5Factor64(mv) - e2) >= q - 1
   274  			// <=> ntz(mv) >= q - 1 && pow5Factor64(mv) - e2 >= q - 1
   275  			// <=> ntz(mv) >= q - 1 (e2 is negative and -e2 >= q)
   276  			// <=> (mv & ((1 << (q - 1)) - 1)) == 0
   277  			// We also need to make sure that the left shift does not overflow.
   278  			vrIsTrailingZeros = multipleOfPowerOfTwo64(mv, q-1)
   279  		}
   280  	}
   281  
   282  	// Step 4: Find the shortest decimal representation
   283  	// in the interval of valid representations.
   284  	var removed int32
   285  	var lastRemovedDigit uint8
   286  	var out uint64
   287  	// On average, we remove ~2 digits.
   288  	if vmIsTrailingZeros || vrIsTrailingZeros {
   289  		// General case, which happens rarely (~0.7%).
   290  		for {
   291  			vpDiv10 := vp / 10
   292  			vmDiv10 := vm / 10
   293  			if vpDiv10 <= vmDiv10 {
   294  				break
   295  			}
   296  			vmMod10 := vm % 10
   297  			vrDiv10 := vr / 10
   298  			vrMod10 := vr % 10
   299  			vmIsTrailingZeros = vmIsTrailingZeros && vmMod10 == 0
   300  			vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
   301  			lastRemovedDigit = uint8(vrMod10)
   302  			vr = vrDiv10
   303  			vp = vpDiv10
   304  			vm = vmDiv10
   305  			removed++
   306  		}
   307  		if vmIsTrailingZeros {
   308  			for {
   309  				vmDiv10 := vm / 10
   310  				vmMod10 := vm % 10
   311  				if vmMod10 != 0 {
   312  					break
   313  				}
   314  				vpDiv10 := vp / 10
   315  				vrDiv10 := vr / 10
   316  				vrMod10 := vr % 10
   317  				vrIsTrailingZeros = vrIsTrailingZeros && lastRemovedDigit == 0
   318  				lastRemovedDigit = uint8(vrMod10)
   319  				vr = vrDiv10
   320  				vp = vpDiv10
   321  				vm = vmDiv10
   322  				removed++
   323  			}
   324  		}
   325  		if vrIsTrailingZeros && lastRemovedDigit == 5 && vr%2 == 0 {
   326  			// Round even if the exact number is .....50..0.
   327  			lastRemovedDigit = 4
   328  		}
   329  		out = vr
   330  		// We need to take vr + 1 if vr is outside bounds
   331  		// or we need to round up.
   332  		if (vr == vm && (!acceptBounds || !vmIsTrailingZeros)) || lastRemovedDigit >= 5 {
   333  			out++
   334  		}
   335  	} else {
   336  		// Specialized for the common case (~99.3%).
   337  		// Percentages below are relative to this.
   338  		roundUp := false
   339  		for vp/100 > vm/100 {
   340  			// Optimization: remove two digits at a time (~86.2%).
   341  			roundUp = vr%100 >= 50
   342  			vr /= 100
   343  			vp /= 100
   344  			vm /= 100
   345  			removed += 2
   346  		}
   347  		// Loop iterations below (approximately), without optimization above:
   348  		// 0: 0.03%, 1: 13.8%, 2: 70.6%, 3: 14.0%, 4: 1.40%, 5: 0.14%, 6+: 0.02%
   349  		// Loop iterations below (approximately), with optimization above:
   350  		// 0: 70.6%, 1: 27.8%, 2: 1.40%, 3: 0.14%, 4+: 0.02%
   351  		for vp/10 > vm/10 {
   352  			roundUp = vr%10 >= 5
   353  			vr /= 10
   354  			vp /= 10
   355  			vm /= 10
   356  			removed++
   357  		}
   358  		// We need to take vr + 1 if vr is outside bounds
   359  		// or we need to round up.
   360  		out = vr + boolToUint64(vr == vm || roundUp)
   361  	}
   362  
   363  	return dec64{m: out, e: e10 + removed}
   364  }
   365  
   366  var powersOf10 = [...]uint64{
   367  	1e0,
   368  	1e1,
   369  	1e2,
   370  	1e3,
   371  	1e4,
   372  	1e5,
   373  	1e6,
   374  	1e7,
   375  	1e8,
   376  	1e9,
   377  	1e10,
   378  	1e11,
   379  	1e12,
   380  	1e13,
   381  	1e14,
   382  	1e15,
   383  	1e16,
   384  	1e17,
   385  	// We only need to find the length of at most 17 digit numbers.
   386  }
   387  
   388  func decimalLen64(u uint64) int {
   389  	// http://graphics.stanford.edu/~seander/bithacks.html#IntegerLog10
   390  	log2 := 64 - bits.LeadingZeros64(u) - 1
   391  	t := (log2 + 1) * 1233 >> 12
   392  	return t - boolToInt(u < powersOf10[t]) + 1
   393  }
   394  
   395  func mulShift64(m uint64, mul uint128, shift int32) uint64 {
   396  	hihi, hilo := bits.Mul64(m, mul.hi)
   397  	lohi, _ := bits.Mul64(m, mul.lo)
   398  	sum := uint128{hi: hihi, lo: lohi + hilo}
   399  	if sum.lo < lohi {
   400  		sum.hi++ // overflow
   401  	}
   402  	return shiftRight128(sum, shift-64)
   403  }
   404  
   405  func shiftRight128(v uint128, shift int32) uint64 {
   406  	// The shift value is always modulo 64.
   407  	// In the current implementation of the 64-bit version
   408  	// of Ryu, the shift value is always < 64.
   409  	// (It is in the range [2, 59].)
   410  	// Check this here in case a future change requires larger shift
   411  	// values. In this case this function needs to be adjusted.
   412  	assert(shift < 64, "shift < 64")
   413  	return (v.hi << uint64(64-shift)) | (v.lo >> uint(shift))
   414  }
   415  
   416  func pow5Factor64(v uint64) uint32 {
   417  	for n := uint32(0); ; n++ {
   418  		q, r := v/5, v%5
   419  		if r != 0 {
   420  			return n
   421  		}
   422  		v = q
   423  	}
   424  }
   425  
   426  func multipleOfPowerOfFive64(v uint64, p uint32) bool {
   427  	return pow5Factor64(v) >= p
   428  }
   429  
   430  func multipleOfPowerOfTwo64(v uint64, p uint32) bool {
   431  	return uint32(bits.TrailingZeros64(v)) >= p
   432  }