github.com/llir/llvm@v0.3.6/internal/enc/enc.go

github.com/llir/llvm@v0.3.6/internal/enc/enc.go (about)

     1  // Package enc implements encoding of identifiers for LLVM IR assembly.
     2  package enc
     3  
     4  import (
     5  	"fmt"
     6  	"strconv"
     7  	"strings"
     8  )
     9  
    10  // GlobalName encodes a global name to its LLVM IR assembly representation.
    11  //
    12  // Examples:
    13  //    "foo" -> "@foo"
    14  //    "a b" -> `@"a b"`
    15  //    "世" -> `@"\E4\B8\96"`
    16  //    "2" -> `@"2"`
    17  //
    18  // References:
    19  //    http://www.llvm.org/docs/LangRef.html#identifiers
    20  func GlobalName(name string) string {
    21  	// Positive numeric global names are quoted to distinguish global names from
    22  	// global IDs; e.g.
    23  	//
    24  	//    @"2"
    25  	if _, err := strconv.ParseUint(name, 10, 64); err == nil {
    26  		return `@"` + name + `"`
    27  	}
    28  	return "@" + EscapeIdent(name)
    29  }
    30  
    31  // GlobalID encodes a global ID to its LLVM IR assembly representation.
    32  //
    33  // Examples:
    34  //    "42" -> "@42"
    35  //
    36  // References:
    37  //    http://www.llvm.org/docs/LangRef.html#identifiers
    38  func GlobalID(id int64) string {
    39  	if id < 0 {
    40  		panic(fmt.Errorf("negative global ID (%d); should be represented as global name", id))
    41  	}
    42  	return "@" + strconv.FormatInt(id, 10)
    43  }
    44  
    45  // LocalName encodes a local name to its LLVM IR assembly representation.
    46  //
    47  // Examples:
    48  //    "foo" -> "%foo"
    49  //    "a b" -> `%"a b"`
    50  //    "世" -> `%"\E4\B8\96"`
    51  //    "2" -> `%"2"`
    52  //
    53  // References:
    54  //    http://www.llvm.org/docs/LangRef.html#identifiers
    55  func LocalName(name string) string {
    56  	// Positive numeric local names are quoted to distinguish local names from
    57  	// local IDs; e.g.
    58  	//
    59  	//    %"2"
    60  	if _, err := strconv.ParseUint(name, 10, 64); err == nil {
    61  		return `%"` + name + `"`
    62  	}
    63  	return "%" + EscapeIdent(name)
    64  }
    65  
    66  // LocalID encodes a local ID to its LLVM IR assembly representation.
    67  //
    68  // Examples:
    69  //    "42" -> "%42"
    70  //
    71  // References:
    72  //    http://www.llvm.org/docs/LangRef.html#identifiers
    73  func LocalID(id int64) string {
    74  	if id < 0 {
    75  		panic(fmt.Errorf("negative local ID (%d); should be represented as local name", id))
    76  	}
    77  	return "%" + strconv.FormatInt(id, 10)
    78  }
    79  
    80  // LabelName encodes a label name to its LLVM IR assembly representation.
    81  //
    82  // Examples:
    83  //    "foo" -> "foo:"
    84  //    "a b" -> `"a b":`
    85  //    "世" -> `"\E4\B8\96":`
    86  //    "2" -> `"2":`
    87  //
    88  // References:
    89  //    http://www.llvm.org/docs/LangRef.html#identifiers
    90  func LabelName(name string) string {
    91  	// Positive numeric label names are quoted to distinguish label names from
    92  	// label IDs; e.g.
    93  	//
    94  	//    "2":
    95  	if _, err := strconv.ParseUint(name, 10, 64); err == nil {
    96  		return `"` + name + `":`
    97  	}
    98  	return EscapeIdent(name) + ":"
    99  }
   100  
   101  // LabelID encodes a label ID to its LLVM IR assembly representation.
   102  //
   103  // Examples:
   104  //    "42" -> 42:
   105  //
   106  // References:
   107  //    http://www.llvm.org/docs/LangRef.html#identifiers
   108  func LabelID(id int64) string {
   109  	if id < 0 {
   110  		panic(fmt.Errorf("negative label ID (%d); should be represented as label name", id))
   111  	}
   112  	return strconv.FormatInt(id, 10) + ":"
   113  }
   114  
   115  // TypeName encodes a type name to its LLVM IR assembly representation.
   116  //
   117  // Examples:
   118  //    "foo" -> "%foo"
   119  //    "a b" -> `%"a b"`
   120  //    "世" -> `%"\E4\B8\96"`
   121  //    "2" -> `%2`
   122  //
   123  // References:
   124  //    http://www.llvm.org/docs/LangRef.html#identifiers
   125  func TypeName(name string) string {
   126  	return "%" + EscapeIdent(name)
   127  }
   128  
   129  // AttrGroupID encodes a attribute group ID to its LLVM IR assembly
   130  // representation.
   131  //
   132  // Examples:
   133  //    "42" -> "#42"
   134  //
   135  // References:
   136  //    http://www.llvm.org/docs/LangRef.html#identifiers
   137  func AttrGroupID(id int64) string {
   138  	return "#" + strconv.FormatInt(id, 10)
   139  }
   140  
   141  // ComdatName encodes a comdat name to its LLVM IR assembly representation.
   142  //
   143  // Examples:
   144  //    "foo" -> $%foo"
   145  //    "a b" -> `$"a b"`
   146  //    "世" -> `$"\E4\B8\96"`
   147  //
   148  // References:
   149  //    http://www.llvm.org/docs/LangRef.html#identifiers
   150  func ComdatName(name string) string {
   151  	return "$" + EscapeIdent(name)
   152  }
   153  
   154  // MetadataName encodes a metadata name to its LLVM IR assembly representation.
   155  //
   156  // Examples:
   157  //    "foo" -> "!foo"
   158  //    "a b" -> `!a\20b`
   159  //    "世" -> `!\E4\B8\96`
   160  //
   161  // References:
   162  //    http://www.llvm.org/docs/LangRef.html#identifiers
   163  func MetadataName(name string) string {
   164  	valid := func(b byte) bool {
   165  		return strings.IndexByte(tail, b) != -1
   166  	}
   167  	if strings.ContainsRune(decimal, rune(name[0])) {
   168  		// Escape first character if digit, to distinguish named from unnamed
   169  		// metadata.
   170  		return "!" + `\3` + name[:1] + string(Escape([]byte(name[1:]), valid))
   171  	}
   172  	return "!" + string(Escape([]byte(name), valid))
   173  }
   174  
   175  // MetadataID encodes a metadata ID to its LLVM IR assembly representation.
   176  //
   177  // Examples:
   178  //    "42" -> "!42"
   179  //
   180  // References:
   181  //    http://www.llvm.org/docs/LangRef.html#identifiers
   182  func MetadataID(id int64) string {
   183  	return "!" + strconv.FormatInt(id, 10)
   184  }
   185  
   186  const (
   187  	// decimal specifies the decimal digit characters.
   188  	decimal = "0123456789"
   189  	// upper specifies the uppercase letters.
   190  	upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ"
   191  	// lower specifies the lowercase letters.
   192  	lower = "abcdefghijklmnopqrstuvwxyz"
   193  	// alpha specifies the alphabetic characters.
   194  	alpha = upper + lower
   195  	// head is the set of valid characters for the first character of an
   196  	// identifier.
   197  	head = alpha + "$-._"
   198  	// tail is the set of valid characters for the remaining characters of an
   199  	// identifier (i.e. all characters in the identifier except the first). All
   200  	// characters of a label may be from the tail set, even the first character.
   201  	tail = head + decimal
   202  	// quotedIdent is the set of valid characters in quoted identifiers, which
   203  	// excludes ASCII control characters, double quote, backslash and extended
   204  	// ASCII characters.
   205  	quotedIdent = " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~"
   206  )
   207  
   208  // EscapeIdent replaces any characters which are not valid in identifiers with
   209  // corresponding hexadecimal escape sequence (\XX).
   210  func EscapeIdent(s string) string {
   211  	replace := false
   212  	extra := 0
   213  	for i := 0; i < len(s); i++ {
   214  		if strings.IndexByte(tail, s[i]) == -1 {
   215  			// Check if a replacement is required.
   216  			//
   217  			// Note, there are characters which are not valid in an identifier
   218  			// (e.g. '#') but are valid in a quoted identifier, and therefore
   219  			// require a replacement (i.e. quoted identifier), but no extra
   220  			// characters for the escape sequence.
   221  			replace = true
   222  		}
   223  		if strings.IndexByte(quotedIdent, s[i]) == -1 {
   224  			// Two extra bytes are required for each byte not valid in a quoted
   225  			// identifier; e.g.
   226  			//
   227  			//    "\t" -> `\09`
   228  			//    "世" -> `\E4\B8\96`
   229  			extra += 2
   230  		}
   231  	}
   232  	if !replace {
   233  		return s
   234  	}
   235  	// Replace invalid characters.
   236  	const hextable = "0123456789ABCDEF"
   237  	buf := make([]byte, len(s)+extra)
   238  	j := 0
   239  	for i := 0; i < len(s); i++ {
   240  		b := s[i]
   241  		if strings.IndexByte(quotedIdent, b) != -1 {
   242  			buf[j] = b
   243  			j++
   244  			continue
   245  		}
   246  		buf[j] = '\\'
   247  		buf[j+1] = hextable[b>>4]
   248  		buf[j+2] = hextable[b&0x0F]
   249  		j += 3
   250  	}
   251  	// Add surrounding quotes.
   252  	return `"` + string(buf) + `"`
   253  }
   254  
   255  // EscapeString replaces any characters in s categorized as invalid in string
   256  // literals with corresponding hexadecimal escape sequence (\XX).
   257  func EscapeString(s []byte) string {
   258  	valid := func(b byte) bool {
   259  		return ' ' <= b && b <= '~' && b != '"' && b != '\\'
   260  	}
   261  	return string(Escape(s, valid))
   262  }
   263  
   264  // Escape replaces any characters in s categorized as invalid by the valid
   265  // function with corresponding hexadecimal escape sequence (\XX).
   266  func Escape(s []byte, valid func(b byte) bool) string {
   267  	// Check if a replacement is required.
   268  	extra := 0
   269  	for i := 0; i < len(s); i++ {
   270  		if !valid(s[i]) {
   271  			// Two extra bytes are required for each invalid byte; e.g.
   272  			//    "#" -> `\23`
   273  			//    "世" -> `\E4\B8\96`
   274  			extra += 2
   275  		}
   276  	}
   277  	if extra == 0 {
   278  		return string(s)
   279  	}
   280  	// Replace invalid characters.
   281  	const hextable = "0123456789ABCDEF"
   282  	buf := make([]byte, len(s)+extra)
   283  	j := 0
   284  	for i := 0; i < len(s); i++ {
   285  		b := s[i]
   286  		if valid(b) {
   287  			buf[j] = b
   288  			j++
   289  			continue
   290  		}
   291  		buf[j] = '\\'
   292  		buf[j+1] = hextable[b>>4]
   293  		buf[j+2] = hextable[b&0x0F]
   294  		j += 3
   295  	}
   296  	return string(buf)
   297  }
   298  
   299  // Unescape replaces hexadecimal escape sequences (\xx) in s with their
   300  // corresponding characters.
   301  func Unescape(s string) []byte {
   302  	if !strings.ContainsRune(s, '\\') {
   303  		return []byte(s)
   304  	}
   305  	j := 0
   306  	buf := []byte(s)
   307  	for i := 0; i < len(s); i++ {
   308  		b := s[i]
   309  		if b == '\\' {
   310  			if len(s) > i+1 && s[i+1] == '\\' {
   311  				b = '\\'
   312  				i++
   313  			} else if len(s) > i+2 {
   314  				x1, ok := unhex(s[i+1])
   315  				if ok {
   316  					x2, ok := unhex(s[i+2])
   317  					if ok {
   318  						b = x1<<4 | x2
   319  						i += 2
   320  					}
   321  				}
   322  			}
   323  		}
   324  		if i != j {
   325  			buf[j] = b
   326  		}
   327  		j++
   328  	}
   329  	return buf[:j]
   330  }
   331  
   332  // Quote returns s as a double-quoted string literal.
   333  func Quote(s []byte) string {
   334  	return `"` + string(EscapeString(s)) + `"`
   335  }
   336  
   337  // Unquote interprets s as a double-quoted string literal, returning the string
   338  // value that s quotes.
   339  func Unquote(s string) []byte {
   340  	if len(s) < 2 {
   341  		panic(fmt.Errorf("invalid length of quoted string; expected >= 2, got %d", len(s)))
   342  	}
   343  	if !strings.HasPrefix(s, `"`) {
   344  		panic(fmt.Errorf("invalid quoted string `%s`; missing quote character prefix", s))
   345  	}
   346  	if !strings.HasSuffix(s, `"`) {
   347  		panic(fmt.Errorf("invalid quoted string `%s`; missing quote character suffix", s))
   348  	}
   349  	// Skip double-quotes.
   350  	s = s[1 : len(s)-1]
   351  	return Unescape(s)
   352  }
   353  
   354  // unhex returns the numeric value represented by the hexadecimal digit b. It
   355  // returns false if b is not a hexadecimal digit.
   356  func unhex(b byte) (v byte, ok bool) {
   357  	// This is an adapted copy of the unhex function from the strconv package,
   358  	// which is governed by a BSD-style license.
   359  	switch {
   360  	case '0' <= b && b <= '9':
   361  		return b - '0', true
   362  	case 'a' <= b && b <= 'f':
   363  		return b - 'a' + 10, true
   364  	case 'A' <= b && b <= 'F':
   365  		return b - 'A' + 10, true
   366  	}
   367  	return 0, false
   368  }