github.com/evanw/esbuild@v0.21.4/internal/js_ast/js_ident.go (about)

     1  package js_ast
     2  
     3  import (
     4  	"strings"
     5  	"unicode"
     6  	"unicode/utf8"
     7  )
     8  
     9  func IsIdentifier(text string) bool {
    10  	if len(text) == 0 {
    11  		return false
    12  	}
    13  	for i, codePoint := range text {
    14  		if i == 0 {
    15  			if !IsIdentifierStart(codePoint) {
    16  				return false
    17  			}
    18  		} else {
    19  			if !IsIdentifierContinue(codePoint) {
    20  				return false
    21  			}
    22  		}
    23  	}
    24  	return true
    25  }
    26  
    27  func IsIdentifierES5AndESNext(text string) bool {
    28  	if len(text) == 0 {
    29  		return false
    30  	}
    31  	for i, codePoint := range text {
    32  		if i == 0 {
    33  			if !IsIdentifierStartES5AndESNext(codePoint) {
    34  				return false
    35  			}
    36  		} else {
    37  			if !IsIdentifierContinueES5AndESNext(codePoint) {
    38  				return false
    39  			}
    40  		}
    41  	}
    42  	return true
    43  }
    44  
    45  func ForceValidIdentifier(prefix string, text string) string {
    46  	sb := strings.Builder{}
    47  
    48  	// Private identifiers must be prefixed by "#"
    49  	if prefix != "" {
    50  		sb.WriteString(prefix)
    51  	}
    52  
    53  	// Identifier start
    54  	c, width := utf8.DecodeRuneInString(text)
    55  	text = text[width:]
    56  	if IsIdentifierStart(c) {
    57  		sb.WriteRune(c)
    58  	} else {
    59  		sb.WriteRune('_')
    60  	}
    61  
    62  	// Identifier continue
    63  	for text != "" {
    64  		c, width := utf8.DecodeRuneInString(text)
    65  		text = text[width:]
    66  		if IsIdentifierContinue(c) {
    67  			sb.WriteRune(c)
    68  		} else {
    69  			sb.WriteRune('_')
    70  		}
    71  	}
    72  
    73  	return sb.String()
    74  }
    75  
    76  // This does "IsIdentifier(UTF16ToString(text))" without any allocations
    77  func IsIdentifierUTF16(text []uint16) bool {
    78  	n := len(text)
    79  	if n == 0 {
    80  		return false
    81  	}
    82  	for i := 0; i < n; i++ {
    83  		isStart := i == 0
    84  		r1 := rune(text[i])
    85  		if r1 >= 0xD800 && r1 <= 0xDBFF && i+1 < n {
    86  			if r2 := rune(text[i+1]); r2 >= 0xDC00 && r2 <= 0xDFFF {
    87  				r1 = (r1 << 10) + r2 + (0x10000 - (0xD800 << 10) - 0xDC00)
    88  				i++
    89  			}
    90  		}
    91  		if isStart {
    92  			if !IsIdentifierStart(r1) {
    93  				return false
    94  			}
    95  		} else {
    96  			if !IsIdentifierContinue(r1) {
    97  				return false
    98  			}
    99  		}
   100  	}
   101  	return true
   102  }
   103  
   104  // This does "IsIdentifierES5AndESNext(UTF16ToString(text))" without any allocations
   105  func IsIdentifierES5AndESNextUTF16(text []uint16) bool {
   106  	n := len(text)
   107  	if n == 0 {
   108  		return false
   109  	}
   110  	for i := 0; i < n; i++ {
   111  		isStart := i == 0
   112  		r1 := rune(text[i])
   113  		if r1 >= 0xD800 && r1 <= 0xDBFF && i+1 < n {
   114  			if r2 := rune(text[i+1]); r2 >= 0xDC00 && r2 <= 0xDFFF {
   115  				r1 = (r1 << 10) + r2 + (0x10000 - (0xD800 << 10) - 0xDC00)
   116  				i++
   117  			}
   118  		}
   119  		if isStart {
   120  			if !IsIdentifierStartES5AndESNext(r1) {
   121  				return false
   122  			}
   123  		} else {
   124  			if !IsIdentifierContinueES5AndESNext(r1) {
   125  				return false
   126  			}
   127  		}
   128  	}
   129  	return true
   130  }
   131  
   132  func IsIdentifierStart(codePoint rune) bool {
   133  	switch codePoint {
   134  	case '_', '$',
   135  		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
   136  		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
   137  		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
   138  		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
   139  		return true
   140  	}
   141  
   142  	// All ASCII identifier start code points are listed above
   143  	if codePoint < 0x7F {
   144  		return false
   145  	}
   146  
   147  	return unicode.Is(idStartES5OrESNext, codePoint)
   148  }
   149  
   150  func IsIdentifierContinue(codePoint rune) bool {
   151  	switch codePoint {
   152  	case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   153  		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
   154  		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
   155  		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
   156  		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
   157  		return true
   158  	}
   159  
   160  	// All ASCII identifier start code points are listed above
   161  	if codePoint < 0x7F {
   162  		return false
   163  	}
   164  
   165  	// ZWNJ and ZWJ are allowed in identifiers
   166  	if codePoint == 0x200C || codePoint == 0x200D {
   167  		return true
   168  	}
   169  
   170  	return unicode.Is(idContinueES5OrESNext, codePoint)
   171  }
   172  
   173  func IsIdentifierStartES5AndESNext(codePoint rune) bool {
   174  	switch codePoint {
   175  	case '_', '$',
   176  		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
   177  		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
   178  		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
   179  		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
   180  		return true
   181  	}
   182  
   183  	// All ASCII identifier start code points are listed above
   184  	if codePoint < 0x7F {
   185  		return false
   186  	}
   187  
   188  	return unicode.Is(idStartES5AndESNext, codePoint)
   189  }
   190  
   191  func IsIdentifierContinueES5AndESNext(codePoint rune) bool {
   192  	switch codePoint {
   193  	case '_', '$', '0', '1', '2', '3', '4', '5', '6', '7', '8', '9',
   194  		'a', 'b', 'c', 'd', 'e', 'f', 'g', 'h', 'i', 'j', 'k', 'l', 'm',
   195  		'n', 'o', 'p', 'q', 'r', 's', 't', 'u', 'v', 'w', 'x', 'y', 'z',
   196  		'A', 'B', 'C', 'D', 'E', 'F', 'G', 'H', 'I', 'J', 'K', 'L', 'M',
   197  		'N', 'O', 'P', 'Q', 'R', 'S', 'T', 'U', 'V', 'W', 'X', 'Y', 'Z':
   198  		return true
   199  	}
   200  
   201  	// All ASCII identifier start code points are listed above
   202  	if codePoint < 0x7F {
   203  		return false
   204  	}
   205  
   206  	// ZWNJ and ZWJ are allowed in identifiers
   207  	if codePoint == 0x200C || codePoint == 0x200D {
   208  		return true
   209  	}
   210  
   211  	return unicode.Is(idContinueES5AndESNext, codePoint)
   212  }
   213  
   214  // See the "White Space Code Points" table in the ECMAScript standard
   215  func IsWhitespace(codePoint rune) bool {
   216  	switch codePoint {
   217  	case
   218  		'\u0009', // character tabulation
   219  		'\u000B', // line tabulation
   220  		'\u000C', // form feed
   221  		'\u0020', // space
   222  		'\u00A0', // no-break space
   223  
   224  		// Unicode "Space_Separator" code points
   225  		'\u1680', // ogham space mark
   226  		'\u2000', // en quad
   227  		'\u2001', // em quad
   228  		'\u2002', // en space
   229  		'\u2003', // em space
   230  		'\u2004', // three-per-em space
   231  		'\u2005', // four-per-em space
   232  		'\u2006', // six-per-em space
   233  		'\u2007', // figure space
   234  		'\u2008', // punctuation space
   235  		'\u2009', // thin space
   236  		'\u200A', // hair space
   237  		'\u202F', // narrow no-break space
   238  		'\u205F', // medium mathematical space
   239  		'\u3000', // ideographic space
   240  
   241  		'\uFEFF': // zero width non-breaking space
   242  		return true
   243  
   244  	default:
   245  		return false
   246  	}
   247  }