github.com/llir/llvm@v0.3.6/ir/constant/const_int.go (about)

     1  package constant
     2  
     3  import (
     4  	"fmt"
     5  	"math/big"
     6  	"strings"
     7  
     8  	"github.com/llir/llvm/ir/types"
     9  	"github.com/pkg/errors"
    10  )
    11  
    12  // --- [ Integer constants ] ---------------------------------------------------
    13  
    14  // Int is an LLVM IR integer constant.
    15  type Int struct {
    16  	// Integer type.
    17  	Typ *types.IntType
    18  	// Integer constant.
    19  	X *big.Int
    20  }
    21  
    22  // NewInt returns a new integer constant based on the given integer type and
    23  // 64-bit interger value.
    24  func NewInt(typ *types.IntType, x int64) *Int {
    25  	return &Int{Typ: typ, X: big.NewInt(x)}
    26  }
    27  
    28  // NewBool returns a new boolean constant based on the given boolean value.
    29  func NewBool(x bool) *Int {
    30  	if x {
    31  		return True
    32  	}
    33  	return False
    34  }
    35  
    36  // NewIntFromString returns a new integer constant based on the given integer
    37  // type and string.
    38  //
    39  // The integer string may be expressed in one of the following forms.
    40  //
    41  //    * boolean literal
    42  //         true | false
    43  //    * integer literal
    44  //         [-]?[0-9]+
    45  //    * hexadecimal integer literal
    46  //         [us]0x[0-9A-Fa-f]+
    47  func NewIntFromString(typ *types.IntType, s string) (*Int, error) {
    48  	// Boolean literal.
    49  	switch s {
    50  	case "true":
    51  		if !typ.Equal(types.I1) {
    52  			return nil, errors.Errorf("invalid boolean type; expected i1, got %T", typ)
    53  		}
    54  		return True, nil
    55  	case "false":
    56  		if !typ.Equal(types.I1) {
    57  			return nil, errors.Errorf("invalid boolean type; expected i1, got %T", typ)
    58  		}
    59  		return False, nil
    60  	}
    61  	// Hexadecimal integer literal.
    62  	switch {
    63  	// unsigned hexadecimal integer literal
    64  	case strings.HasPrefix(s, "u0x"):
    65  		s = s[len("u0x"):]
    66  		const base = 16
    67  		x, _ := (&big.Int{}).SetString(s, base)
    68  		if x == nil {
    69  			return nil, errors.Errorf("unable to parse integer constant %q", s)
    70  		}
    71  		return &Int{Typ: typ, X: x}, nil
    72  	// signed hexadecimal integer literal
    73  	case strings.HasPrefix(s, "s0x"):
    74  		// Parse signed hexadecimal integer literal in two's complement notation.
    75  		// First parse as unsigned hex, then check if sign bit is set.
    76  		s = s[len("s0x"):]
    77  		const base = 16
    78  		x, _ := (&big.Int{}).SetString(s, base)
    79  		if x == nil {
    80  			return nil, errors.Errorf("unable to parse integer constant %q", s)
    81  		}
    82  		// Check if signed.
    83  		if x.Bit(int(typ.BitSize)-1) == 1 {
    84  			// Compute actual negative value from two's complement.
    85  			//
    86  			// If x is 0xFFFF with type i16, then the actual negative value is
    87  			// `x - 0x10000`, in other words `x - 2^n`.
    88  			n := int64(typ.BitSize)
    89  			// n^2
    90  			maxPlus1 := new(big.Int).Exp(big.NewInt(2), big.NewInt(n), nil)
    91  			x = new(big.Int).Sub(x, maxPlus1)
    92  
    93  		}
    94  		return &Int{Typ: typ, X: x}, nil
    95  	}
    96  	// Integer literal.
    97  	x, _ := (&big.Int{}).SetString(s, 10)
    98  	if x == nil {
    99  		return nil, errors.Errorf("unable to parse integer constant %q", s)
   100  	}
   101  	return &Int{Typ: typ, X: x}, nil
   102  }
   103  
   104  // String returns the LLVM syntax representation of the constant as a type-value
   105  // pair.
   106  func (c *Int) String() string {
   107  	return fmt.Sprintf("%v %v", c.Type(), c.Ident())
   108  }
   109  
   110  // Type returns the type of the constant.
   111  func (c *Int) Type() types.Type {
   112  	return c.Typ
   113  }
   114  
   115  // Ident returns the identifier associated with the constant.
   116  func (c *Int) Ident() string {
   117  	// IntLit
   118  	if c.Typ.BitSize == 1 {
   119  		// "true"
   120  		// "false"
   121  		switch x := c.X.Int64(); x {
   122  		case 0:
   123  			return "false"
   124  		case 1:
   125  			return "true"
   126  		default:
   127  			panic(fmt.Errorf("invalid integer value of boolean type; expected 0 or 1, got %d", x))
   128  		}
   129  	}
   130  	// Output x in hexadecimal notation if x is positive, greater than or equal
   131  	// to 0x1000 and has a significantly lower entropy than decimal notation.
   132  
   133  	// Minimum difference between entropy of decimal and hexadecimal notation to
   134  	// output x in hexadecimal notation.
   135  	const minEntropyDiff = 0.2
   136  	// Maximum allowed entropy of hexadecimal notation to output x in hexadecimal
   137  	// notation.
   138  	//
   139  	// This is useful as some hex values, while lower entropy than their decimal
   140  	// counter-part do not improve readability.
   141  	//
   142  	// For instance, the decimal entropy of 7240739780546808700 is 9/10 = 0.9 and
   143  	// the hexadecimal entropy of 0x647C4677A2884B7C is 8/16 = 0.5. As such the
   144  	// entropy difference is 0.9-0.5 = 0.4, but the hexadecimal notation does not
   145  	// improve readability. Thus we add an upper bound on the hexadecimal entropy,
   146  	// and if the entropy is above this bound, output in decimal notation
   147  	// instead.
   148  	hexLength := len(c.X.Text(16))
   149  	maxHexEntropy := calcMaxHexEntropy(hexLength)
   150  	threshold := big.NewInt(0x1000) // 4096
   151  	// Check entropy if x >= 0x1000.
   152  	if c.X.Cmp(threshold) >= 0 {
   153  		hexentropy := hexEntropy(c.X)
   154  		decentropy := decimalEntropy(c.X)
   155  		if hexentropy <= maxHexEntropy+0.01 && decentropy >= hexentropy+minEntropyDiff {
   156  			return "u0x" + strings.ToUpper(c.X.Text(16))
   157  		}
   158  	}
   159  	return c.X.String()
   160  }
   161  
   162  // ### [ Helper functions ] ####################################################
   163  
   164  // calcMaxHexEntropy returns the maximum allowed hexadecimal entropy based on
   165  // the length of x in hexadecimal notation.
   166  //
   167  //    maxHexEntropy = 0.0    length < 4
   168  //    maxHexEntropy = 0.5    length == 4 (2/4)
   169  //    maxHexEntropy = 0.4    length == 5 (2/5)
   170  //    maxHexEntropy = 0.34   length == 6 (2/6)
   171  //    maxHexEntropy = 0.43   length == 7 (3/7)
   172  //    maxHexEntropy = 0.38   length == 8 (3/8)
   173  //    maxHexEntropy = 0.34   length == 9 (3/9)
   174  //    maxHexEntropy = 0.3    length == 10 (3/10)
   175  //    maxHexEntropy = 0.37   length == 11 (4/11)
   176  //    maxHexEntropy = 0.34   length == 12 (4/12)
   177  //    maxHexEntropy = 0.31   length == 13 (4/13)
   178  //    maxHexEntropy = 0.29   length == 14 (4/14)
   179  //    maxHexEntropy = 0.27   length == 15 (4/15)
   180  //    maxHexEntropy = 0.25   length >= 16 (4/16)
   181  func calcMaxHexEntropy(length int) float64 {
   182  	if length > 16 {
   183  		length = 16
   184  	}
   185  	switch {
   186  	case length < 4:
   187  		return 0
   188  	case 4 <= length && length <= 6:
   189  		return 2.0 / float64(length)
   190  	case 7 <= length && length <= 10:
   191  		return 3.0 / float64(length)
   192  	// length >= 11
   193  	default:
   194  		return 4.0 / float64(length)
   195  	}
   196  }
   197  
   198  // hexEntropy returns the entropy of x when encoded in hexadecimal notation. The
   199  // entropy is in range (0.0, 1.0] and is determined by the number of unique hex
   200  // digits required to represent x in hexadecimal notation divided by the total
   201  // number of hex digits ignoring prefix (capped by base 16).
   202  //
   203  // For instance, the hexadecimal value 0x80000000 (2147483648 in decimal)
   204  // requires two unique hex digits to be represented in hexadecimal notation,
   205  // ignoring prefix; namely '0' and '8'.
   206  //
   207  // Hex digits of 0x80000000:
   208  //    0 0 0 0 0 0 0
   209  //    8
   210  //
   211  // The total number of hex digits in 0x80000000 is 8. Thus, the entropy of
   212  // 0x80000000 in hexadecimal notation is
   213  //
   214  //    unique_digits/total_digits
   215  //    = 2/8
   216  //    = 0.25
   217  func hexEntropy(x *big.Int) float64 {
   218  	const base = 16
   219  	return intEntropy(x, base)
   220  }
   221  
   222  // decimalEntropy returns the entropy of x when encoded in decimal notation. The
   223  // entropy is in range (0.0, 1.0] and is determined by the number of unique
   224  // decimal digits required to represent x in decimal notation divided by the
   225  // total number of digits (capped by base 10).
   226  //
   227  // For instance, the decimal value 2147483648 (0x80000000 in hex) requires seven
   228  // unique decimal digits to be represented in decimal notation; namely '1', '2',
   229  // '3', '4', '6', '7' and '8'.
   230  //
   231  // Decimal digits of 2147483648:
   232  //    1
   233  //    2
   234  //    3
   235  //    4 4 4
   236  //    6
   237  //    7
   238  //    8 8
   239  //
   240  // The total number of decimal digits in 2147483648 is 10. Thus, the entropy of
   241  // 2147483648 in decimal notation is
   242  //
   243  //    unique_digits/total_digits
   244  //    = 7/10
   245  //    = 0.7
   246  func decimalEntropy(x *big.Int) float64 {
   247  	const base = 10
   248  	return intEntropy(x, base)
   249  }
   250  
   251  // intEntropy returns the entropy of x when encoded in base notation. Base must
   252  // be between 2 and 62, inclusive. The entropy is in range (0.0, 1.0] and is
   253  // determined by the number of unique digits required to represent x in base
   254  // notation divided by the total number of digits (capped by base).
   255  func intEntropy(x *big.Int, base int) float64 {
   256  	if base < 2 || base > 62 {
   257  		panic(fmt.Errorf("invalid base; expected 2 <= base <= 62, got %d", base))
   258  	}
   259  	const maxBase = 62
   260  	var digits [maxBase]bool
   261  	s := x.Text(base)
   262  	// Locate unique digits.
   263  	for i := 0; i < len(s); i++ {
   264  		b := s[i]
   265  		if b == '-' {
   266  			// skip sign.
   267  			continue
   268  		}
   269  		d := digitValue(b)
   270  		digits[d] = true
   271  	}
   272  	// Count unique digits.
   273  	uniqueDigits := 0
   274  	for i := 0; i < base; i++ {
   275  		if digits[i] {
   276  			uniqueDigits++
   277  		}
   278  	}
   279  	length := len(s)
   280  	if length > base {
   281  		length = base
   282  	}
   283  	return float64(uniqueDigits) / float64(length)
   284  }
   285  
   286  // digitValue returns the integer value of the given digit byte. As defined by
   287  // *big.Int.Text, the digit uses the lower-case letters 'a' to 'z' for digit
   288  // values 10 to 35, and the upper-case letters 'A' to 'Z' for digit values 36 to
   289  // 61.
   290  func digitValue(b byte) int {
   291  	switch {
   292  	case '0' <= b && b <= '9':
   293  		return 0 + int(b-'0')
   294  	case 'a' <= b && b <= 'z':
   295  		return 10 + int(b-'a')
   296  	case 'A' <= b && b <= 'Z':
   297  		return 36 + int(b-'A')
   298  	default:
   299  		panic(fmt.Errorf("invalid digit byte; expected [0-9a-zA-Z], got %#U", b))
   300  	}
   301  }