github.com/expr-lang/expr@v1.16.9/parser/lexer/utils.go (about) 1 package lexer 2 3 import ( 4 "fmt" 5 "math" 6 "strings" 7 "unicode/utf8" 8 ) 9 10 var ( 11 newlineNormalizer = strings.NewReplacer("\r\n", "\n", "\r", "\n") 12 ) 13 14 // Unescape takes a quoted string, unquotes, and unescapes it. 15 func unescape(value string) (string, error) { 16 // All strings normalize newlines to the \n representation. 17 value = newlineNormalizer.Replace(value) 18 n := len(value) 19 20 // Nothing to unescape / decode. 21 if n < 2 { 22 return value, fmt.Errorf("unable to unescape string") 23 } 24 25 // Quoted string of some form, must have same first and last char. 26 if value[0] != value[n-1] || (value[0] != '"' && value[0] != '\'') { 27 return value, fmt.Errorf("unable to unescape string") 28 } 29 30 value = value[1 : n-1] 31 32 // The string contains escape characters. 33 // The following logic is adapted from `strconv/quote.go` 34 var runeTmp [utf8.UTFMax]byte 35 size := 3 * uint64(n) / 2 36 if size >= math.MaxInt { 37 return "", fmt.Errorf("too large string") 38 } 39 buf := make([]byte, 0, size) 40 for len(value) > 0 { 41 c, multibyte, rest, err := unescapeChar(value) 42 if err != nil { 43 return "", err 44 } 45 value = rest 46 if c < utf8.RuneSelf || !multibyte { 47 buf = append(buf, byte(c)) 48 } else { 49 n := utf8.EncodeRune(runeTmp[:], c) 50 buf = append(buf, runeTmp[:n]...) 51 } 52 } 53 return string(buf), nil 54 } 55 56 // unescapeChar takes a string input and returns the following info: 57 // 58 // value - the escaped unicode rune at the front of the string. 59 // multibyte - whether the rune value might require multiple bytes to represent. 60 // tail - the remainder of the input string. 61 // err - error value, if the character could not be unescaped. 62 // 63 // When multibyte is true the return value may still fit within a single byte, 64 // but a multibyte conversion is attempted which is more expensive than when the 65 // value is known to fit within one byte. 66 func unescapeChar(s string) (value rune, multibyte bool, tail string, err error) { 67 // 1. Character is not an escape sequence. 68 switch c := s[0]; { 69 case c >= utf8.RuneSelf: 70 r, size := utf8.DecodeRuneInString(s) 71 return r, true, s[size:], nil 72 case c != '\\': 73 return rune(s[0]), false, s[1:], nil 74 } 75 76 // 2. Last character is the start of an escape sequence. 77 if len(s) <= 1 { 78 err = fmt.Errorf("unable to unescape string, found '\\' as last character") 79 return 80 } 81 82 c := s[1] 83 s = s[2:] 84 // 3. Common escape sequences shared with Google SQL 85 switch c { 86 case 'a': 87 value = '\a' 88 case 'b': 89 value = '\b' 90 case 'f': 91 value = '\f' 92 case 'n': 93 value = '\n' 94 case 'r': 95 value = '\r' 96 case 't': 97 value = '\t' 98 case 'v': 99 value = '\v' 100 case '\\': 101 value = '\\' 102 case '\'': 103 value = '\'' 104 case '"': 105 value = '"' 106 case '`': 107 value = '`' 108 case '?': 109 value = '?' 110 111 // 4. Unicode escape sequences, reproduced from `strconv/quote.go` 112 case 'x', 'X', 'u', 'U': 113 n := 0 114 switch c { 115 case 'x', 'X': 116 n = 2 117 case 'u': 118 n = 4 119 case 'U': 120 n = 8 121 } 122 var v rune 123 if len(s) < n { 124 err = fmt.Errorf("unable to unescape string") 125 return 126 } 127 for j := 0; j < n; j++ { 128 x, ok := unhex(s[j]) 129 if !ok { 130 err = fmt.Errorf("unable to unescape string") 131 return 132 } 133 v = v<<4 | x 134 } 135 s = s[n:] 136 if v > utf8.MaxRune { 137 err = fmt.Errorf("unable to unescape string") 138 return 139 } 140 value = v 141 multibyte = true 142 143 // 5. Octal escape sequences, must be three digits \[0-3][0-7][0-7] 144 case '0', '1', '2', '3': 145 if len(s) < 2 { 146 err = fmt.Errorf("unable to unescape octal sequence in string") 147 return 148 } 149 v := rune(c - '0') 150 for j := 0; j < 2; j++ { 151 x := s[j] 152 if x < '0' || x > '7' { 153 err = fmt.Errorf("unable to unescape octal sequence in string") 154 return 155 } 156 v = v*8 + rune(x-'0') 157 } 158 if v > utf8.MaxRune { 159 err = fmt.Errorf("unable to unescape string") 160 return 161 } 162 value = v 163 s = s[2:] 164 multibyte = true 165 166 // Unknown escape sequence. 167 default: 168 err = fmt.Errorf("unable to unescape string") 169 } 170 171 tail = s 172 return 173 } 174 175 func unhex(b byte) (rune, bool) { 176 c := rune(b) 177 switch { 178 case '0' <= c && c <= '9': 179 return c - '0', true 180 case 'a' <= c && c <= 'f': 181 return c - 'a' + 10, true 182 case 'A' <= c && c <= 'F': 183 return c - 'A' + 10, true 184 } 185 return 0, false 186 }