github.com/razvanm/vanadium-go-1.3@v0.0.0-20160721203343-4a65068e5915/test/float_lit2.go (about) 1 // run 2 3 // Check conversion of constant to float32/float64 near min/max boundaries. 4 5 // Copyright 2014 The Go Authors. All rights reserved. 6 // Use of this source code is governed by a BSD-style 7 // license that can be found in the LICENSE file. 8 9 package main 10 11 import ( 12 "fmt" 13 "math" 14 ) 15 16 // The largest exact float32 is f₁ = (1+(1-2²³))×2¹²⁷ = (1-2²⁴)×2¹²⁸ = 2¹²⁸ - 2¹⁰⁴. 17 // The next float32 would be f₂ = (1+1)×2¹²⁷ = 1×2¹²⁸, except that exponent is out of range. 18 // Float32 conversion rounds to the nearest float32, rounding to even mantissa: 19 // between f₁ and f₂, values closer to f₁ round to f₁ and values closer to f₂ are rejected as out of range. 20 // f₁ is an odd mantissa, so the halfway point (f₁+f₂)/2 rounds to f₂ and is rejected. 21 // The halfway point is (f₁+f₂)/2 = 2¹²⁸ - 2¹⁰⁵. 22 // 23 // The same is true of float64, with different constants: s/24/53/ and s/128/1024/. 24 25 const ( 26 two24 = 1.0 * (1 << 24) 27 two53 = 1.0 * (1 << 53) 28 two64 = 1.0 * (1 << 64) 29 two128 = two64 * two64 30 two256 = two128 * two128 31 two512 = two256 * two256 32 two768 = two512 * two256 33 two1024 = two512 * two512 34 35 ulp32 = two128 / two24 36 max32 = two128 - ulp32 37 38 ulp64 = two1024 / two53 39 max64 = two1024 - ulp64 40 ) 41 42 var cvt = []struct { 43 bits uint64 // keep us honest 44 exact interface{} 45 approx interface{} 46 text string 47 }{ 48 // 0 49 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 - ulp32/2), "max32 - ulp32 - ulp32/2"}, 50 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32), "max32 - ulp32"}, 51 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32/2), "max32 - ulp32/2"}, 52 {0x7f7ffffe, float32(max32 - ulp32), float32(max32 - ulp32 + ulp32/2), "max32 - ulp32 + ulp32/2"}, 53 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + ulp32/two64), "max32 - ulp32 + ulp32/2 + ulp32/two64"}, 54 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + ulp32/two64), "max32 - ulp32/2 + ulp32/two64"}, 55 {0x7f7fffff, float32(max32), float32(max32), "max32"}, 56 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - ulp32/two64), "max32 + ulp32/2 - ulp32/two64"}, 57 58 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 - ulp32/2)), "-(max32 - ulp32 - ulp32/2)"}, 59 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32)), "-(max32 - ulp32)"}, 60 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32/2)), "-(max32 - ulp32/2)"}, 61 {0xff7ffffe, float32(-(max32 - ulp32)), float32(-(max32 - ulp32 + ulp32/2)), "-(max32 - ulp32 + ulp32/2)"}, 62 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + ulp32/two64)), "-(max32 - ulp32 + ulp32/2 + ulp32/two64)"}, 63 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + ulp32/two64)), "-(max32 - ulp32/2 + ulp32/two64)"}, 64 {0xff7fffff, float32(-(max32)), float32(-(max32)), "-(max32)"}, 65 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - ulp32/two64)), "-(max32 + ulp32/2 - ulp32/two64)"}, 66 67 // These are required to work: according to the Go spec, the internal float mantissa must be at least 256 bits, 68 // and these expressions can be represented exactly with a 256-bit mantissa. 69 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1), "max32 - ulp32 + ulp32/2 + 1"}, 70 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1), "max32 - ulp32/2 + 1"}, 71 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1), "max32 + ulp32/2 - 1"}, 72 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1)), "-(max32 - ulp32 + ulp32/2 + 1)"}, 73 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1)), "-(max32 - ulp32/2 + 1)"}, 74 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1)), "-(max32 + ulp32/2 - 1)"}, 75 76 {0x7f7fffff, float32(max32), float32(max32 - ulp32 + ulp32/2 + 1/two128), "max32 - ulp32 + ulp32/2 + 1/two128"}, 77 {0x7f7fffff, float32(max32), float32(max32 - ulp32/2 + 1/two128), "max32 - ulp32/2 + 1/two128"}, 78 {0x7f7fffff, float32(max32), float32(max32 + ulp32/2 - 1/two128), "max32 + ulp32/2 - 1/two128"}, 79 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32 + ulp32/2 + 1/two128)), "-(max32 - ulp32 + ulp32/2 + 1/two128)"}, 80 {0xff7fffff, float32(-(max32)), float32(-(max32 - ulp32/2 + 1/two128)), "-(max32 - ulp32/2 + 1/two128)"}, 81 {0xff7fffff, float32(-(max32)), float32(-(max32 + ulp32/2 - 1/two128)), "-(max32 + ulp32/2 - 1/two128)"}, 82 83 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 - ulp64/2), "max64 - ulp64 - ulp64/2"}, 84 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64), "max64 - ulp64"}, 85 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64/2), "max64 - ulp64/2"}, 86 {0x7feffffffffffffe, float64(max64 - ulp64), float64(max64 - ulp64 + ulp64/2), "max64 - ulp64 + ulp64/2"}, 87 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + ulp64/two64), "max64 - ulp64 + ulp64/2 + ulp64/two64"}, 88 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + ulp64/two64), "max64 - ulp64/2 + ulp64/two64"}, 89 {0x7fefffffffffffff, float64(max64), float64(max64), "max64"}, 90 {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - ulp64/two64), "max64 + ulp64/2 - ulp64/two64"}, 91 92 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 - ulp64/2)), "-(max64 - ulp64 - ulp64/2)"}, 93 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64)), "-(max64 - ulp64)"}, 94 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64/2)), "-(max64 - ulp64/2)"}, 95 {0xffeffffffffffffe, float64(-(max64 - ulp64)), float64(-(max64 - ulp64 + ulp64/2)), "-(max64 - ulp64 + ulp64/2)"}, 96 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + ulp64/two64)), "-(max64 - ulp64 + ulp64/2 + ulp64/two64)"}, 97 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + ulp64/two64)), "-(max64 - ulp64/2 + ulp64/two64)"}, 98 {0xffefffffffffffff, float64(-(max64)), float64(-(max64)), "-(max64)"}, 99 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - ulp64/two64)), "-(max64 + ulp64/2 - ulp64/two64)"}, 100 101 // These are required to work. 102 // The mantissas are exactly 256 bits. 103 // max64 is just below 2¹⁰²⁴ so the bottom bit we can use is 2⁷⁶⁸. 104 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64 + ulp64/2 + two768), "max64 - ulp64 + ulp64/2 + two768"}, 105 {0x7fefffffffffffff, float64(max64), float64(max64 - ulp64/2 + two768), "max64 - ulp64/2 + two768"}, 106 {0x7fefffffffffffff, float64(max64), float64(max64 + ulp64/2 - two768), "max64 + ulp64/2 - two768"}, 107 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64 + ulp64/2 + two768)), "-(max64 - ulp64 + ulp64/2 + two768)"}, 108 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 - ulp64/2 + two768)), "-(max64 - ulp64/2 + two768)"}, 109 {0xffefffffffffffff, float64(-(max64)), float64(-(max64 + ulp64/2 - two768)), "-(max64 + ulp64/2 - two768)"}, 110 } 111 112 var bugged = false 113 114 func bug() { 115 if !bugged { 116 bugged = true 117 fmt.Println("BUG") 118 } 119 } 120 121 func main() { 122 u64 := math.Float64frombits(0x7fefffffffffffff) - math.Float64frombits(0x7feffffffffffffe) 123 if ulp64 != u64 { 124 bug() 125 fmt.Printf("ulp64=%g, want %g", ulp64, u64) 126 } 127 128 u32 := math.Float32frombits(0x7f7fffff) - math.Float32frombits(0x7f7ffffe) 129 if ulp32 != u32 { 130 bug() 131 fmt.Printf("ulp32=%g, want %g", ulp32, u32) 132 } 133 134 for _, c := range cvt { 135 if bits(c.exact) != c.bits { 136 bug() 137 fmt.Printf("%s: inconsistent table: bits=%#x (%g) but exact=%g (%#x)\n", c.text, c.bits, fromBits(c.bits, c.exact), c.exact, bits(c.exact)) 138 } 139 if c.approx != c.exact || bits(c.approx) != c.bits { 140 bug() 141 fmt.Printf("%s: have %g (%#x) want %g (%#x)\n", c.text, c.approx, bits(c.approx), c.exact, c.bits) 142 } 143 } 144 } 145 146 func bits(x interface{}) interface{} { 147 switch x := x.(type) { 148 case float32: 149 return uint64(math.Float32bits(x)) 150 case float64: 151 return math.Float64bits(x) 152 } 153 return 0 154 } 155 156 func fromBits(b uint64, x interface{}) interface{} { 157 switch x.(type) { 158 case float32: 159 return math.Float32frombits(uint32(b)) 160 case float64: 161 return math.Float64frombits(b) 162 } 163 return "?" 164 }