github.com/evanw/esbuild@v0.21.4/internal/js_ast/js_ident.go (about) 1 package js_ast 2 3 import ( 4 "strings" 5 "unicode" 6 "unicode/utf8" 7 ) 8 9 func IsIdentifier(text string) bool { 10 if len(text) == 0 { 11 return false 12 } 13 for i, codePoint := range text { 14 if i == 0 { 15 if !IsIdentifierStart(codePoint) { 16 return false 17 } 18 } else { 19 if !IsIdentifierContinue(codePoint) { 20 return false 21 } 22 } 23 } 24 return true 25 } 26 27 func IsIdentifierES5AndESNext(text string) bool { 28 if len(text) == 0 { 29 return false 30 } 31 for i, codePoint := range text { 32 if i == 0 { 33 if !IsIdentifierStartES5AndESNext(codePoint) { 34 return false 35 } 36 } else { 37 if !IsIdentifierContinueES5AndESNext(codePoint) { 38 return false 39 } 40 } 41 } 42 return true 43 } 44 45 func ForceValidIdentifier(prefix string, text string) string { 46 sb := strings.Builder{} 47 48 // Private identifiers must be prefixed by "#" 49 if prefix != "" { 50 sb.WriteString(prefix) 51 } 52 53 // Identifier start 54 c, width := utf8.DecodeRuneInString(text) 55 text = text[width:] 56 if IsIdentifierStart(c) { 57 sb.WriteRune(c) 58 } else { 59 sb.WriteRune('_') 60 } 61 62 // Identifier continue 63 for text != "" { 64 c, width := utf8.DecodeRuneInString(text) 65 text = text[width:] 66 if IsIdentifierContinue(c) { 67 sb.WriteRune(c) 68 } else { 69 sb.WriteRune('_') 70 } 71 } 72 73 return sb.String() 74 } 75 76 // This does "IsIdentifier(UTF16ToString(text))" without any allocations 77 func IsIdentifierUTF16(text []uint16) bool { 78 n := len(text) 79 if n == 0 { 80 return false 81 } 82 for i := 0; i < n; i++ { 83 isStart := i == 0 84 r1 := rune(text[i]) 85 if r1 >= 0xD800 && r1 <= 0xDBFF && i+1 < n { 86 if r2 := rune(text[i+1]); r2 >= 0xDC00 && r2 <= 0xDFFF { 87 r1 = (r1 << 10) + r2 + (0x10000 - (0xD800 << 10) - 0xDC00) 88 i++ 89 } 90 } 91 if isStart { 92 if !IsIdentifierStart(r1) { 93 return false 94 } 95 } else { 96 if !IsIdentifierContinue(r1) { 97 return false 98 } 99 } 100 } 101 return true 102 } 103 104 // This does "IsIdentifierES5AndESNext(UTF16ToString(text))" without any allocations 105 func IsIdentifierES5AndESNextUTF16(text []uint16) bool { 106 n := len(text) 107 if n == 0 { 108 return false 109 } 110 for i := 0; i < n; i++ { 111 isStart := i == 0 112 r1 := rune(text[i]) 113 if r1 >= 0xD800 && r1 <= 0xDBFF && i+1 < n { 114 if r2 := rune(text[i+1]); r2 >= 0xDC00 && r2 <= 0xDFFF { 115 r1 = (r1 << 10) + r2 + (0x10000 - (0xD800 << 10) - 0xDC00) 116 i++ 117 } 118 } 119 if isStart { 120 if !IsIdentifierStartES5AndESNext(r1) { 121 return false 122 } 123 } else { 124 if !IsIdentifierContinueES5AndESNext(r1) { 125 return false 126 } 127 } 128 } 129 return true 130 } 131 132 func IsIdentifierStart(codePoint rune) bool { 133 switch codePoint { 134 case '_', '$', 135 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 136 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 137 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 138 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': 139 return true 140 } 141 142 // All ASCII identifier start code points are listed above 143 if codePoint < 0x7F { 144 return false 145 } 146 147 return unicode.Is(idStartES5OrESNext, codePoint) 148 } 149 150 func IsIdentifierContinue(codePoint rune) bool { 151 switch codePoint { 152 case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 153 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 154 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 155 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 156 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': 157 return true 158 } 159 160 // All ASCII identifier start code points are listed above 161 if codePoint < 0x7F { 162 return false 163 } 164 165 // ZWNJ and ZWJ are allowed in identifiers 166 if codePoint == 0x200C || codePoint == 0x200D { 167 return true 168 } 169 170 return unicode.Is(idContinueES5OrESNext, codePoint) 171 } 172 173 func IsIdentifierStartES5AndESNext(codePoint rune) bool { 174 switch codePoint { 175 case '_', '$', 176 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 177 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 178 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 179 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': 180 return true 181 } 182 183 // All ASCII identifier start code points are listed above 184 if codePoint < 0x7F { 185 return false 186 } 187 188 return unicode.Is(idStartES5AndESNext, codePoint) 189 } 190 191 func IsIdentifierContinueES5AndESNext(codePoint rune) bool { 192 switch codePoint { 193 case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 194 'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm', 195 'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z', 196 'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M', 197 'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z': 198 return true 199 } 200 201 // All ASCII identifier start code points are listed above 202 if codePoint < 0x7F { 203 return false 204 } 205 206 // ZWNJ and ZWJ are allowed in identifiers 207 if codePoint == 0x200C || codePoint == 0x200D { 208 return true 209 } 210 211 return unicode.Is(idContinueES5AndESNext, codePoint) 212 } 213 214 // See the "White Space Code Points" table in the ECMAScript standard 215 func IsWhitespace(codePoint rune) bool { 216 switch codePoint { 217 case 218 '\u0009', // character tabulation 219 '\u000B', // line tabulation 220 '\u000C', // form feed 221 '\u0020', // space 222 '\u00A0', // no-break space 223 224 // Unicode "Space_Separator" code points 225 '\u1680', // ogham space mark 226 '\u2000', // en quad 227 '\u2001', // em quad 228 '\u2002', // en space 229 '\u2003', // em space 230 '\u2004', // three-per-em space 231 '\u2005', // four-per-em space 232 '\u2006', // six-per-em space 233 '\u2007', // figure space 234 '\u2008', // punctuation space 235 '\u2009', // thin space 236 '\u200A', // hair space 237 '\u202F', // narrow no-break space 238 '\u205F', // medium mathematical space 239 '\u3000', // ideographic space 240 241 '\uFEFF': // zero width non-breaking space 242 return true 243 244 default: 245 return false 246 } 247 }