github.com/llir/llvm@v0.3.6/internal/enc/enc.go (about) 1 // Package enc implements encoding of identifiers for LLVM IR assembly. 2 package enc 3 4 import ( 5 "fmt" 6 "strconv" 7 "strings" 8 ) 9 10 // GlobalName encodes a global name to its LLVM IR assembly representation. 11 // 12 // Examples: 13 // "foo" -> "@foo" 14 // "a b" -> `@"a b"` 15 // "世" -> `@"\E4\B8\96"` 16 // "2" -> `@"2"` 17 // 18 // References: 19 // http://www.llvm.org/docs/LangRef.html#identifiers 20 func GlobalName(name string) string { 21 // Positive numeric global names are quoted to distinguish global names from 22 // global IDs; e.g. 23 // 24 // @"2" 25 if _, err := strconv.ParseUint(name, 10, 64); err == nil { 26 return `@"` + name + `"` 27 } 28 return "@" + EscapeIdent(name) 29 } 30 31 // GlobalID encodes a global ID to its LLVM IR assembly representation. 32 // 33 // Examples: 34 // "42" -> "@42" 35 // 36 // References: 37 // http://www.llvm.org/docs/LangRef.html#identifiers 38 func GlobalID(id int64) string { 39 if id < 0 { 40 panic(fmt.Errorf("negative global ID (%d); should be represented as global name", id)) 41 } 42 return "@" + strconv.FormatInt(id, 10) 43 } 44 45 // LocalName encodes a local name to its LLVM IR assembly representation. 46 // 47 // Examples: 48 // "foo" -> "%foo" 49 // "a b" -> `%"a b"` 50 // "世" -> `%"\E4\B8\96"` 51 // "2" -> `%"2"` 52 // 53 // References: 54 // http://www.llvm.org/docs/LangRef.html#identifiers 55 func LocalName(name string) string { 56 // Positive numeric local names are quoted to distinguish local names from 57 // local IDs; e.g. 58 // 59 // %"2" 60 if _, err := strconv.ParseUint(name, 10, 64); err == nil { 61 return `%"` + name + `"` 62 } 63 return "%" + EscapeIdent(name) 64 } 65 66 // LocalID encodes a local ID to its LLVM IR assembly representation. 67 // 68 // Examples: 69 // "42" -> "%42" 70 // 71 // References: 72 // http://www.llvm.org/docs/LangRef.html#identifiers 73 func LocalID(id int64) string { 74 if id < 0 { 75 panic(fmt.Errorf("negative local ID (%d); should be represented as local name", id)) 76 } 77 return "%" + strconv.FormatInt(id, 10) 78 } 79 80 // LabelName encodes a label name to its LLVM IR assembly representation. 81 // 82 // Examples: 83 // "foo" -> "foo:" 84 // "a b" -> `"a b":` 85 // "世" -> `"\E4\B8\96":` 86 // "2" -> `"2":` 87 // 88 // References: 89 // http://www.llvm.org/docs/LangRef.html#identifiers 90 func LabelName(name string) string { 91 // Positive numeric label names are quoted to distinguish label names from 92 // label IDs; e.g. 93 // 94 // "2": 95 if _, err := strconv.ParseUint(name, 10, 64); err == nil { 96 return `"` + name + `":` 97 } 98 return EscapeIdent(name) + ":" 99 } 100 101 // LabelID encodes a label ID to its LLVM IR assembly representation. 102 // 103 // Examples: 104 // "42" -> 42: 105 // 106 // References: 107 // http://www.llvm.org/docs/LangRef.html#identifiers 108 func LabelID(id int64) string { 109 if id < 0 { 110 panic(fmt.Errorf("negative label ID (%d); should be represented as label name", id)) 111 } 112 return strconv.FormatInt(id, 10) + ":" 113 } 114 115 // TypeName encodes a type name to its LLVM IR assembly representation. 116 // 117 // Examples: 118 // "foo" -> "%foo" 119 // "a b" -> `%"a b"` 120 // "世" -> `%"\E4\B8\96"` 121 // "2" -> `%2` 122 // 123 // References: 124 // http://www.llvm.org/docs/LangRef.html#identifiers 125 func TypeName(name string) string { 126 return "%" + EscapeIdent(name) 127 } 128 129 // AttrGroupID encodes a attribute group ID to its LLVM IR assembly 130 // representation. 131 // 132 // Examples: 133 // "42" -> "#42" 134 // 135 // References: 136 // http://www.llvm.org/docs/LangRef.html#identifiers 137 func AttrGroupID(id int64) string { 138 return "#" + strconv.FormatInt(id, 10) 139 } 140 141 // ComdatName encodes a comdat name to its LLVM IR assembly representation. 142 // 143 // Examples: 144 // "foo" -> $%foo" 145 // "a b" -> `$"a b"` 146 // "世" -> `$"\E4\B8\96"` 147 // 148 // References: 149 // http://www.llvm.org/docs/LangRef.html#identifiers 150 func ComdatName(name string) string { 151 return "$" + EscapeIdent(name) 152 } 153 154 // MetadataName encodes a metadata name to its LLVM IR assembly representation. 155 // 156 // Examples: 157 // "foo" -> "!foo" 158 // "a b" -> `!a\20b` 159 // "世" -> `!\E4\B8\96` 160 // 161 // References: 162 // http://www.llvm.org/docs/LangRef.html#identifiers 163 func MetadataName(name string) string { 164 valid := func(b byte) bool { 165 return strings.IndexByte(tail, b) != -1 166 } 167 if strings.ContainsRune(decimal, rune(name[0])) { 168 // Escape first character if digit, to distinguish named from unnamed 169 // metadata. 170 return "!" + `\3` + name[:1] + string(Escape([]byte(name[1:]), valid)) 171 } 172 return "!" + string(Escape([]byte(name), valid)) 173 } 174 175 // MetadataID encodes a metadata ID to its LLVM IR assembly representation. 176 // 177 // Examples: 178 // "42" -> "!42" 179 // 180 // References: 181 // http://www.llvm.org/docs/LangRef.html#identifiers 182 func MetadataID(id int64) string { 183 return "!" + strconv.FormatInt(id, 10) 184 } 185 186 const ( 187 // decimal specifies the decimal digit characters. 188 decimal = "0123456789" 189 // upper specifies the uppercase letters. 190 upper = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" 191 // lower specifies the lowercase letters. 192 lower = "abcdefghijklmnopqrstuvwxyz" 193 // alpha specifies the alphabetic characters. 194 alpha = upper + lower 195 // head is the set of valid characters for the first character of an 196 // identifier. 197 head = alpha + "$-._" 198 // tail is the set of valid characters for the remaining characters of an 199 // identifier (i.e. all characters in the identifier except the first). All 200 // characters of a label may be from the tail set, even the first character. 201 tail = head + decimal 202 // quotedIdent is the set of valid characters in quoted identifiers, which 203 // excludes ASCII control characters, double quote, backslash and extended 204 // ASCII characters. 205 quotedIdent = " !#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWXYZ[]^_`abcdefghijklmnopqrstuvwxyz{|}~" 206 ) 207 208 // EscapeIdent replaces any characters which are not valid in identifiers with 209 // corresponding hexadecimal escape sequence (\XX). 210 func EscapeIdent(s string) string { 211 replace := false 212 extra := 0 213 for i := 0; i < len(s); i++ { 214 if strings.IndexByte(tail, s[i]) == -1 { 215 // Check if a replacement is required. 216 // 217 // Note, there are characters which are not valid in an identifier 218 // (e.g. '#') but are valid in a quoted identifier, and therefore 219 // require a replacement (i.e. quoted identifier), but no extra 220 // characters for the escape sequence. 221 replace = true 222 } 223 if strings.IndexByte(quotedIdent, s[i]) == -1 { 224 // Two extra bytes are required for each byte not valid in a quoted 225 // identifier; e.g. 226 // 227 // "\t" -> `\09` 228 // "世" -> `\E4\B8\96` 229 extra += 2 230 } 231 } 232 if !replace { 233 return s 234 } 235 // Replace invalid characters. 236 const hextable = "0123456789ABCDEF" 237 buf := make([]byte, len(s)+extra) 238 j := 0 239 for i := 0; i < len(s); i++ { 240 b := s[i] 241 if strings.IndexByte(quotedIdent, b) != -1 { 242 buf[j] = b 243 j++ 244 continue 245 } 246 buf[j] = '\\' 247 buf[j+1] = hextable[b>>4] 248 buf[j+2] = hextable[b&0x0F] 249 j += 3 250 } 251 // Add surrounding quotes. 252 return `"` + string(buf) + `"` 253 } 254 255 // EscapeString replaces any characters in s categorized as invalid in string 256 // literals with corresponding hexadecimal escape sequence (\XX). 257 func EscapeString(s []byte) string { 258 valid := func(b byte) bool { 259 return ' ' <= b && b <= '~' && b != '"' && b != '\\' 260 } 261 return string(Escape(s, valid)) 262 } 263 264 // Escape replaces any characters in s categorized as invalid by the valid 265 // function with corresponding hexadecimal escape sequence (\XX). 266 func Escape(s []byte, valid func(b byte) bool) string { 267 // Check if a replacement is required. 268 extra := 0 269 for i := 0; i < len(s); i++ { 270 if !valid(s[i]) { 271 // Two extra bytes are required for each invalid byte; e.g. 272 // "#" -> `\23` 273 // "世" -> `\E4\B8\96` 274 extra += 2 275 } 276 } 277 if extra == 0 { 278 return string(s) 279 } 280 // Replace invalid characters. 281 const hextable = "0123456789ABCDEF" 282 buf := make([]byte, len(s)+extra) 283 j := 0 284 for i := 0; i < len(s); i++ { 285 b := s[i] 286 if valid(b) { 287 buf[j] = b 288 j++ 289 continue 290 } 291 buf[j] = '\\' 292 buf[j+1] = hextable[b>>4] 293 buf[j+2] = hextable[b&0x0F] 294 j += 3 295 } 296 return string(buf) 297 } 298 299 // Unescape replaces hexadecimal escape sequences (\xx) in s with their 300 // corresponding characters. 301 func Unescape(s string) []byte { 302 if !strings.ContainsRune(s, '\\') { 303 return []byte(s) 304 } 305 j := 0 306 buf := []byte(s) 307 for i := 0; i < len(s); i++ { 308 b := s[i] 309 if b == '\\' { 310 if len(s) > i+1 && s[i+1] == '\\' { 311 b = '\\' 312 i++ 313 } else if len(s) > i+2 { 314 x1, ok := unhex(s[i+1]) 315 if ok { 316 x2, ok := unhex(s[i+2]) 317 if ok { 318 b = x1<<4 | x2 319 i += 2 320 } 321 } 322 } 323 } 324 if i != j { 325 buf[j] = b 326 } 327 j++ 328 } 329 return buf[:j] 330 } 331 332 // Quote returns s as a double-quoted string literal. 333 func Quote(s []byte) string { 334 return `"` + string(EscapeString(s)) + `"` 335 } 336 337 // Unquote interprets s as a double-quoted string literal, returning the string 338 // value that s quotes. 339 func Unquote(s string) []byte { 340 if len(s) < 2 { 341 panic(fmt.Errorf("invalid length of quoted string; expected >= 2, got %d", len(s))) 342 } 343 if !strings.HasPrefix(s, `"`) { 344 panic(fmt.Errorf("invalid quoted string `%s`; missing quote character prefix", s)) 345 } 346 if !strings.HasSuffix(s, `"`) { 347 panic(fmt.Errorf("invalid quoted string `%s`; missing quote character suffix", s)) 348 } 349 // Skip double-quotes. 350 s = s[1 : len(s)-1] 351 return Unescape(s) 352 } 353 354 // unhex returns the numeric value represented by the hexadecimal digit b. It 355 // returns false if b is not a hexadecimal digit. 356 func unhex(b byte) (v byte, ok bool) { 357 // This is an adapted copy of the unhex function from the strconv package, 358 // which is governed by a BSD-style license. 359 switch { 360 case '0' <= b && b <= '9': 361 return b - '0', true 362 case 'a' <= b && b <= 'f': 363 return b - 'a' + 10, true 364 case 'A' <= b && b <= 'F': 365 return b - 'A' + 10, true 366 } 367 return 0, false 368 }