github.com/llir/llvm@v0.3.6/ir/constant/const_int.go (about) 1 package constant 2 3 import ( 4 "fmt" 5 "math/big" 6 "strings" 7 8 "github.com/llir/llvm/ir/types" 9 "github.com/pkg/errors" 10 ) 11 12 // --- [ Integer constants ] --------------------------------------------------- 13 14 // Int is an LLVM IR integer constant. 15 type Int struct { 16 // Integer type. 17 Typ *types.IntType 18 // Integer constant. 19 X *big.Int 20 } 21 22 // NewInt returns a new integer constant based on the given integer type and 23 // 64-bit interger value. 24 func NewInt(typ *types.IntType, x int64) *Int { 25 return &Int{Typ: typ, X: big.NewInt(x)} 26 } 27 28 // NewBool returns a new boolean constant based on the given boolean value. 29 func NewBool(x bool) *Int { 30 if x { 31 return True 32 } 33 return False 34 } 35 36 // NewIntFromString returns a new integer constant based on the given integer 37 // type and string. 38 // 39 // The integer string may be expressed in one of the following forms. 40 // 41 // * boolean literal 42 // true | false 43 // * integer literal 44 // [-]?[0-9]+ 45 // * hexadecimal integer literal 46 // [us]0x[0-9A-Fa-f]+ 47 func NewIntFromString(typ *types.IntType, s string) (*Int, error) { 48 // Boolean literal. 49 switch s { 50 case "true": 51 if !typ.Equal(types.I1) { 52 return nil, errors.Errorf("invalid boolean type; expected i1, got %T", typ) 53 } 54 return True, nil 55 case "false": 56 if !typ.Equal(types.I1) { 57 return nil, errors.Errorf("invalid boolean type; expected i1, got %T", typ) 58 } 59 return False, nil 60 } 61 // Hexadecimal integer literal. 62 switch { 63 // unsigned hexadecimal integer literal 64 case strings.HasPrefix(s, "u0x"): 65 s = s[len("u0x"):] 66 const base = 16 67 x, _ := (&big.Int{}).SetString(s, base) 68 if x == nil { 69 return nil, errors.Errorf("unable to parse integer constant %q", s) 70 } 71 return &Int{Typ: typ, X: x}, nil 72 // signed hexadecimal integer literal 73 case strings.HasPrefix(s, "s0x"): 74 // Parse signed hexadecimal integer literal in two's complement notation. 75 // First parse as unsigned hex, then check if sign bit is set. 76 s = s[len("s0x"):] 77 const base = 16 78 x, _ := (&big.Int{}).SetString(s, base) 79 if x == nil { 80 return nil, errors.Errorf("unable to parse integer constant %q", s) 81 } 82 // Check if signed. 83 if x.Bit(int(typ.BitSize)-1) == 1 { 84 // Compute actual negative value from two's complement. 85 // 86 // If x is 0xFFFF with type i16, then the actual negative value is 87 // `x - 0x10000`, in other words `x - 2^n`. 88 n := int64(typ.BitSize) 89 // n^2 90 maxPlus1 := new(big.Int).Exp(big.NewInt(2), big.NewInt(n), nil) 91 x = new(big.Int).Sub(x, maxPlus1) 92 93 } 94 return &Int{Typ: typ, X: x}, nil 95 } 96 // Integer literal. 97 x, _ := (&big.Int{}).SetString(s, 10) 98 if x == nil { 99 return nil, errors.Errorf("unable to parse integer constant %q", s) 100 } 101 return &Int{Typ: typ, X: x}, nil 102 } 103 104 // String returns the LLVM syntax representation of the constant as a type-value 105 // pair. 106 func (c *Int) String() string { 107 return fmt.Sprintf("%v %v", c.Type(), c.Ident()) 108 } 109 110 // Type returns the type of the constant. 111 func (c *Int) Type() types.Type { 112 return c.Typ 113 } 114 115 // Ident returns the identifier associated with the constant. 116 func (c *Int) Ident() string { 117 // IntLit 118 if c.Typ.BitSize == 1 { 119 // "true" 120 // "false" 121 switch x := c.X.Int64(); x { 122 case 0: 123 return "false" 124 case 1: 125 return "true" 126 default: 127 panic(fmt.Errorf("invalid integer value of boolean type; expected 0 or 1, got %d", x)) 128 } 129 } 130 // Output x in hexadecimal notation if x is positive, greater than or equal 131 // to 0x1000 and has a significantly lower entropy than decimal notation. 132 133 // Minimum difference between entropy of decimal and hexadecimal notation to 134 // output x in hexadecimal notation. 135 const minEntropyDiff = 0.2 136 // Maximum allowed entropy of hexadecimal notation to output x in hexadecimal 137 // notation. 138 // 139 // This is useful as some hex values, while lower entropy than their decimal 140 // counter-part do not improve readability. 141 // 142 // For instance, the decimal entropy of 7240739780546808700 is 9/10 = 0.9 and 143 // the hexadecimal entropy of 0x647C4677A2884B7C is 8/16 = 0.5. As such the 144 // entropy difference is 0.9-0.5 = 0.4, but the hexadecimal notation does not 145 // improve readability. Thus we add an upper bound on the hexadecimal entropy, 146 // and if the entropy is above this bound, output in decimal notation 147 // instead. 148 hexLength := len(c.X.Text(16)) 149 maxHexEntropy := calcMaxHexEntropy(hexLength) 150 threshold := big.NewInt(0x1000) // 4096 151 // Check entropy if x >= 0x1000. 152 if c.X.Cmp(threshold) >= 0 { 153 hexentropy := hexEntropy(c.X) 154 decentropy := decimalEntropy(c.X) 155 if hexentropy <= maxHexEntropy+0.01 && decentropy >= hexentropy+minEntropyDiff { 156 return "u0x" + strings.ToUpper(c.X.Text(16)) 157 } 158 } 159 return c.X.String() 160 } 161 162 // ### [ Helper functions ] #################################################### 163 164 // calcMaxHexEntropy returns the maximum allowed hexadecimal entropy based on 165 // the length of x in hexadecimal notation. 166 // 167 // maxHexEntropy = 0.0 length < 4 168 // maxHexEntropy = 0.5 length == 4 (2/4) 169 // maxHexEntropy = 0.4 length == 5 (2/5) 170 // maxHexEntropy = 0.34 length == 6 (2/6) 171 // maxHexEntropy = 0.43 length == 7 (3/7) 172 // maxHexEntropy = 0.38 length == 8 (3/8) 173 // maxHexEntropy = 0.34 length == 9 (3/9) 174 // maxHexEntropy = 0.3 length == 10 (3/10) 175 // maxHexEntropy = 0.37 length == 11 (4/11) 176 // maxHexEntropy = 0.34 length == 12 (4/12) 177 // maxHexEntropy = 0.31 length == 13 (4/13) 178 // maxHexEntropy = 0.29 length == 14 (4/14) 179 // maxHexEntropy = 0.27 length == 15 (4/15) 180 // maxHexEntropy = 0.25 length >= 16 (4/16) 181 func calcMaxHexEntropy(length int) float64 { 182 if length > 16 { 183 length = 16 184 } 185 switch { 186 case length < 4: 187 return 0 188 case 4 <= length && length <= 6: 189 return 2.0 / float64(length) 190 case 7 <= length && length <= 10: 191 return 3.0 / float64(length) 192 // length >= 11 193 default: 194 return 4.0 / float64(length) 195 } 196 } 197 198 // hexEntropy returns the entropy of x when encoded in hexadecimal notation. The 199 // entropy is in range (0.0, 1.0] and is determined by the number of unique hex 200 // digits required to represent x in hexadecimal notation divided by the total 201 // number of hex digits ignoring prefix (capped by base 16). 202 // 203 // For instance, the hexadecimal value 0x80000000 (2147483648 in decimal) 204 // requires two unique hex digits to be represented in hexadecimal notation, 205 // ignoring prefix; namely '0' and '8'. 206 // 207 // Hex digits of 0x80000000: 208 // 0 0 0 0 0 0 0 209 // 8 210 // 211 // The total number of hex digits in 0x80000000 is 8. Thus, the entropy of 212 // 0x80000000 in hexadecimal notation is 213 // 214 // unique_digits/total_digits 215 // = 2/8 216 // = 0.25 217 func hexEntropy(x *big.Int) float64 { 218 const base = 16 219 return intEntropy(x, base) 220 } 221 222 // decimalEntropy returns the entropy of x when encoded in decimal notation. The 223 // entropy is in range (0.0, 1.0] and is determined by the number of unique 224 // decimal digits required to represent x in decimal notation divided by the 225 // total number of digits (capped by base 10). 226 // 227 // For instance, the decimal value 2147483648 (0x80000000 in hex) requires seven 228 // unique decimal digits to be represented in decimal notation; namely '1', '2', 229 // '3', '4', '6', '7' and '8'. 230 // 231 // Decimal digits of 2147483648: 232 // 1 233 // 2 234 // 3 235 // 4 4 4 236 // 6 237 // 7 238 // 8 8 239 // 240 // The total number of decimal digits in 2147483648 is 10. Thus, the entropy of 241 // 2147483648 in decimal notation is 242 // 243 // unique_digits/total_digits 244 // = 7/10 245 // = 0.7 246 func decimalEntropy(x *big.Int) float64 { 247 const base = 10 248 return intEntropy(x, base) 249 } 250 251 // intEntropy returns the entropy of x when encoded in base notation. Base must 252 // be between 2 and 62, inclusive. The entropy is in range (0.0, 1.0] and is 253 // determined by the number of unique digits required to represent x in base 254 // notation divided by the total number of digits (capped by base). 255 func intEntropy(x *big.Int, base int) float64 { 256 if base < 2 || base > 62 { 257 panic(fmt.Errorf("invalid base; expected 2 <= base <= 62, got %d", base)) 258 } 259 const maxBase = 62 260 var digits [maxBase]bool 261 s := x.Text(base) 262 // Locate unique digits. 263 for i := 0; i < len(s); i++ { 264 b := s[i] 265 if b == '-' { 266 // skip sign. 267 continue 268 } 269 d := digitValue(b) 270 digits[d] = true 271 } 272 // Count unique digits. 273 uniqueDigits := 0 274 for i := 0; i < base; i++ { 275 if digits[i] { 276 uniqueDigits++ 277 } 278 } 279 length := len(s) 280 if length > base { 281 length = base 282 } 283 return float64(uniqueDigits) / float64(length) 284 } 285 286 // digitValue returns the integer value of the given digit byte. As defined by 287 // *big.Int.Text, the digit uses the lower-case letters 'a' to 'z' for digit 288 // values 10 to 35, and the upper-case letters 'A' to 'Z' for digit values 36 to 289 // 61. 290 func digitValue(b byte) int { 291 switch { 292 case '0' <= b && b <= '9': 293 return 0 + int(b-'0') 294 case 'a' <= b && b <= 'z': 295 return 10 + int(b-'a') 296 case 'A' <= b && b <= 'Z': 297 return 36 + int(b-'A') 298 default: 299 panic(fmt.Errorf("invalid digit byte; expected [0-9a-zA-Z], got %#U", b)) 300 } 301 }