github.com/lab47/exprcore@v0.0.0-20210525052339-fb7d6bd9331e/syntax/quote.go (about) 1 // Copyright 2017 The Bazel Authors. All rights reserved. 2 // Use of this source code is governed by a BSD-style 3 // license that can be found in the LICENSE file. 4 5 package syntax 6 7 // exprcore quoted string utilities. 8 9 import ( 10 "fmt" 11 "strconv" 12 "strings" 13 ) 14 15 // unesc maps single-letter chars following \ to their actual values. 16 var unesc = [256]byte{ 17 'a': '\a', 18 'b': '\b', 19 'f': '\f', 20 'n': '\n', 21 'r': '\r', 22 't': '\t', 23 'v': '\v', 24 '\\': '\\', 25 '\'': '\'', 26 '"': '"', 27 } 28 29 // esc maps escape-worthy bytes to the char that should follow \. 30 var esc = [256]byte{ 31 '\a': 'a', 32 '\b': 'b', 33 '\f': 'f', 34 '\n': 'n', 35 '\r': 'r', 36 '\t': 't', 37 '\v': 'v', 38 '\\': '\\', 39 '\'': '\'', 40 '"': '"', 41 } 42 43 // unquote unquotes the quoted string, returning the actual 44 // string value, whether the original was triple-quoted, and 45 // an error describing invalid input. 46 func unquote(quoted string) (s string, triple bool, err error) { 47 // Check for raw prefix: means don't interpret the inner \. 48 raw := false 49 if strings.HasPrefix(quoted, "r") { 50 raw = true 51 quoted = quoted[1:] 52 } 53 54 if len(quoted) < 2 { 55 err = fmt.Errorf("string literal too short") 56 return 57 } 58 59 if quoted[0] != '"' && quoted[0] != '\'' || quoted[0] != quoted[len(quoted)-1] { 60 err = fmt.Errorf("string literal has invalid quotes") 61 return 62 } 63 64 // Check for triple quoted string. 65 quote := quoted[0] 66 if len(quoted) >= 6 && quoted[1] == quote && quoted[2] == quote && quoted[:3] == quoted[len(quoted)-3:] { 67 triple = true 68 quoted = quoted[3 : len(quoted)-3] 69 } else { 70 quoted = quoted[1 : len(quoted)-1] 71 } 72 73 // Now quoted is the quoted data, but no quotes. 74 // If we're in raw mode or there are no escapes or 75 // carriage returns, we're done. 76 var unquoteChars string 77 if raw { 78 unquoteChars = "\r" 79 } else { 80 unquoteChars = "\\\r" 81 } 82 if !strings.ContainsAny(quoted, unquoteChars) { 83 s = quoted 84 return 85 } 86 87 // Otherwise process quoted string. 88 // Each iteration processes one escape sequence along with the 89 // plain text leading up to it. 90 buf := new(strings.Builder) 91 for { 92 // Remove prefix before escape sequence. 93 i := strings.IndexAny(quoted, unquoteChars) 94 if i < 0 { 95 i = len(quoted) 96 } 97 buf.WriteString(quoted[:i]) 98 quoted = quoted[i:] 99 100 if len(quoted) == 0 { 101 break 102 } 103 104 // Process carriage return. 105 if quoted[0] == '\r' { 106 buf.WriteByte('\n') 107 if len(quoted) > 1 && quoted[1] == '\n' { 108 quoted = quoted[2:] 109 } else { 110 quoted = quoted[1:] 111 } 112 continue 113 } 114 115 // Process escape sequence. 116 if len(quoted) == 1 { 117 err = fmt.Errorf(`truncated escape sequence \`) 118 return 119 } 120 121 switch quoted[1] { 122 default: 123 // In exprcore, like Go, a backslash must escape something. 124 // (Python still treats unnecessary backslashes literally, 125 // but since 3.6 has emitted a deprecation warning.) 126 err = fmt.Errorf("invalid escape sequence \\%c", quoted[1]) 127 return 128 129 case '\n': 130 // Ignore the escape and the line break. 131 quoted = quoted[2:] 132 133 case 'a', 'b', 'f', 'n', 'r', 't', 'v', '\\', quote: 134 // One-char escape. 135 // We escape only the kind of quotation mark in use. 136 buf.WriteByte(unesc[quoted[1]]) 137 quoted = quoted[2:] 138 139 case '0', '1', '2', '3', '4', '5', '6', '7': 140 // Octal escape, up to 3 digits. 141 n := int(quoted[1] - '0') 142 quoted = quoted[2:] 143 for i := 1; i < 3; i++ { 144 if len(quoted) == 0 || quoted[0] < '0' || '7' < quoted[0] { 145 break 146 } 147 n = n*8 + int(quoted[0]-'0') 148 quoted = quoted[1:] 149 } 150 if n >= 256 { 151 // NOTE: Python silently discards the high bit, 152 // so that '\541' == '\141' == 'a'. 153 // Let's see if we can avoid doing that in BUILD files. 154 err = fmt.Errorf(`invalid escape sequence \%03o`, n) 155 return 156 } 157 buf.WriteByte(byte(n)) 158 159 case 'x': 160 // Hexadecimal escape, exactly 2 digits. 161 if len(quoted) < 4 { 162 err = fmt.Errorf(`truncated escape sequence %s`, quoted) 163 return 164 } 165 n, err1 := strconv.ParseUint(quoted[2:4], 16, 0) 166 if err1 != nil { 167 err = fmt.Errorf(`invalid escape sequence %s`, quoted[:4]) 168 return 169 } 170 buf.WriteByte(byte(n)) 171 quoted = quoted[4:] 172 } 173 } 174 175 s = buf.String() 176 return 177 } 178 179 // indexByte returns the index of the first instance of b in s, or else -1. 180 func indexByte(s string, b byte) int { 181 for i := 0; i < len(s); i++ { 182 if s[i] == b { 183 return i 184 } 185 } 186 return -1 187 } 188 189 // hex is a list of the hexadecimal digits, for use in quoting. 190 // We always print lower-case hexadecimal. 191 const hex = "0123456789abcdef" 192 193 // quote returns the quoted form of the string value "x". 194 // If triple is true, quote uses the triple-quoted form """x""". 195 func quote(unquoted string, triple bool) string { 196 q := `"` 197 if triple { 198 q = `"""` 199 } 200 201 buf := new(strings.Builder) 202 buf.WriteString(q) 203 204 for i := 0; i < len(unquoted); i++ { 205 c := unquoted[i] 206 if c == '"' && triple && (i+1 < len(unquoted) && unquoted[i+1] != '"' || i+2 < len(unquoted) && unquoted[i+2] != '"') { 207 // Can pass up to two quotes through, because they are followed by a non-quote byte. 208 buf.WriteByte(c) 209 if i+1 < len(unquoted) && unquoted[i+1] == '"' { 210 buf.WriteByte(c) 211 i++ 212 } 213 continue 214 } 215 if triple && c == '\n' { 216 // Can allow newline in triple-quoted string. 217 buf.WriteByte(c) 218 continue 219 } 220 if c == '\'' { 221 // Can allow ' since we always use ". 222 buf.WriteByte(c) 223 continue 224 } 225 if esc[c] != 0 { 226 buf.WriteByte('\\') 227 buf.WriteByte(esc[c]) 228 continue 229 } 230 if c < 0x20 || c >= 0x80 { 231 // BUILD files are supposed to be Latin-1, so escape all control and high bytes. 232 // I'd prefer to use \x here, but Blaze does not implement 233 // \x in quoted strings (b/7272572). 234 buf.WriteByte('\\') 235 buf.WriteByte(hex[c>>6]) // actually octal but reusing hex digits 0-7. 236 buf.WriteByte(hex[(c>>3)&7]) 237 buf.WriteByte(hex[c&7]) 238 /* 239 buf.WriteByte('\\') 240 buf.WriteByte('x') 241 buf.WriteByte(hex[c>>4]) 242 buf.WriteByte(hex[c&0xF]) 243 */ 244 continue 245 } 246 buf.WriteByte(c) 247 continue 248 } 249 250 buf.WriteString(q) 251 return buf.String() 252 }