github.com/joomcode/cue@v0.4.4-0.20221111115225-539fe3512047/cue/literal/quote.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package literal 16 17 import ( 18 "strconv" 19 "strings" 20 "unicode/utf8" 21 ) 22 23 // Form defines how to quote a string or bytes literal. 24 type Form struct { 25 hashCount int 26 quote byte 27 multiline bool 28 auto bool 29 exact bool 30 asciiOnly bool 31 graphicOnly bool 32 indent string 33 tripleQuote string 34 } 35 36 // TODO: 37 // - Fixed or max level of escape modifiers (#""#). 38 // - Option to fall back to bytes if value cannot be represented as string. 39 // E.g. ExactString. 40 // - QuoteExact that fails with an error if a string cannot be represented 41 // without loss. 42 // - Handle auto-breaking for long lines (Swift-style, \-terminated lines). 43 // This is not supported yet in CUE, but may, and should be considered as 44 // a possibility in API design. 45 // - Other possible convenience forms: Blob (auto-break bytes), String (bytes 46 // or string), Label. 47 48 // WithTabIndent returns a new Form with indentation set to the given number 49 // of tabs. The result will be a multiline string. 50 func (f Form) WithTabIndent(n int) Form { 51 f.indent = tabs(n) 52 f.multiline = true 53 return f 54 } 55 56 const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t" 57 58 func tabs(n int) string { 59 if n < len(tabIndent) { 60 return tabIndent[:n] 61 } 62 return strings.Repeat("\t", n) 63 } 64 65 // WithOptionalIndent is like WithTabIndent, but only returns a multiline 66 // strings if it doesn't contain any newline characters. 67 func (f Form) WithOptionalTabIndent(tabs int) Form { 68 if tabs < len(tabIndent) { 69 f.indent = tabIndent[:tabs] 70 } else { 71 f.indent = strings.Repeat("\t", tabs) 72 } 73 f.auto = true 74 return f 75 } 76 77 // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII 78 // characters. 79 func (f Form) WithASCIIOnly() Form { 80 f.asciiOnly = true 81 return f 82 } 83 84 // WithGraphicOnly ensures the quoted strings consists solely of printable 85 // characters. 86 func (f Form) WithGraphicOnly() Form { 87 f.graphicOnly = true 88 return f 89 } 90 91 var ( 92 // String defines the format of a CUE string. Conversions may be lossy. 93 String Form = stringForm 94 95 // TODO: ExactString: quotes to bytes type if the string cannot be 96 // represented without loss of accuracy. 97 98 // Label is like Text, but optimized for labels. 99 Label Form = stringForm 100 101 // Bytes defines the format of bytes literal. 102 Bytes Form = bytesForm 103 104 stringForm = Form{ 105 quote: '"', 106 tripleQuote: `"""`, 107 } 108 bytesForm = Form{ 109 quote: '\'', 110 tripleQuote: `'''`, 111 exact: true, 112 } 113 ) 114 115 // Quote returns CUE string literal representing s. The returned string uses CUE 116 // escape sequences (\t, \n, \u00FF, \u0100) for control characters and 117 // non-printable characters as defined by strconv.IsPrint. 118 // 119 // It reports an error if the string cannot be converted to the desired form. 120 func (f Form) Quote(s string) string { 121 return string(f.Append(make([]byte, 0, 3*len(s)/2), s)) 122 } 123 124 const ( 125 lowerhex = "0123456789abcdef" 126 ) 127 128 // Append appends a CUE string literal representing s, as generated by Quote, to 129 // buf and returns the extended buffer. 130 func (f Form) Append(buf []byte, s string) []byte { 131 if f.auto && strings.ContainsRune(s, '\n') { 132 f.multiline = true 133 } 134 if f.multiline { 135 f.hashCount = f.requiredHashCount(s) 136 } 137 138 // Often called with big strings, so preallocate. If there's quoting, 139 // this is conservative but still helps a lot. 140 if cap(buf)-len(buf) < len(s) { 141 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 142 copy(nBuf, buf) 143 buf = nBuf 144 } 145 for i := 0; i < f.hashCount; i++ { 146 buf = append(buf, '#') 147 } 148 if f.multiline { 149 buf = append(buf, f.quote, f.quote, f.quote, '\n') 150 if s == "" { 151 buf = append(buf, f.indent...) 152 buf = append(buf, f.quote, f.quote, f.quote) 153 return buf 154 } 155 if len(s) > 0 && s[0] != '\n' { 156 buf = append(buf, f.indent...) 157 } 158 } else { 159 buf = append(buf, f.quote) 160 } 161 162 buf = f.appendEscaped(buf, s) 163 164 if f.multiline { 165 buf = append(buf, '\n') 166 buf = append(buf, f.indent...) 167 buf = append(buf, f.quote, f.quote, f.quote) 168 } else { 169 buf = append(buf, f.quote) 170 } 171 for i := 0; i < f.hashCount; i++ { 172 buf = append(buf, '#') 173 } 174 175 return buf 176 } 177 178 // AppendEscaped appends a CUE string literal representing s, as generated by 179 // Quote but without the quotes, to buf and returns the extended buffer. 180 // 181 // It does not include the last indentation. 182 func (f Form) AppendEscaped(buf []byte, s string) []byte { 183 if f.auto && strings.ContainsRune(s, '\n') { 184 f.multiline = true 185 } 186 187 // Often called with big strings, so preallocate. If there's quoting, 188 // this is conservative but still helps a lot. 189 if cap(buf)-len(buf) < len(s) { 190 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 191 copy(nBuf, buf) 192 buf = nBuf 193 } 194 195 buf = f.appendEscaped(buf, s) 196 197 return buf 198 } 199 200 func (f Form) appendEscaped(buf []byte, s string) []byte { 201 for width := 0; len(s) > 0; s = s[width:] { 202 r := rune(s[0]) 203 width = 1 204 if r >= utf8.RuneSelf { 205 r, width = utf8.DecodeRuneInString(s) 206 } 207 if f.exact && width == 1 && r == utf8.RuneError { 208 buf = append(buf, `\x`...) 209 buf = append(buf, lowerhex[s[0]>>4]) 210 buf = append(buf, lowerhex[s[0]&0xF]) 211 continue 212 } 213 if f.multiline && r == '\n' { 214 buf = append(buf, '\n') 215 if len(s) > 1 && s[1] != '\n' { 216 buf = append(buf, f.indent...) 217 } 218 continue 219 } 220 buf = f.appendEscapedRune(buf, r) 221 } 222 return buf 223 } 224 225 func (f *Form) appendEscapedRune(buf []byte, r rune) []byte { 226 var runeTmp [utf8.UTFMax]byte 227 if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed 228 buf = f.appendEscape(buf) 229 buf = append(buf, byte(r)) 230 return buf 231 } 232 if f.asciiOnly { 233 if r < utf8.RuneSelf && strconv.IsPrint(r) { 234 buf = append(buf, byte(r)) 235 return buf 236 } 237 } else if strconv.IsPrint(r) || f.graphicOnly && isInGraphicList(r) { 238 n := utf8.EncodeRune(runeTmp[:], r) 239 buf = append(buf, runeTmp[:n]...) 240 return buf 241 } 242 buf = f.appendEscape(buf) 243 switch r { 244 case '\a': 245 buf = append(buf, 'a') 246 case '\b': 247 buf = append(buf, 'b') 248 case '\f': 249 buf = append(buf, 'f') 250 case '\n': 251 buf = append(buf, 'n') 252 case '\r': 253 buf = append(buf, 'r') 254 case '\t': 255 buf = append(buf, 't') 256 case '\v': 257 buf = append(buf, 'v') 258 default: 259 switch { 260 case r < ' ' && f.exact: 261 buf = append(buf, 'x') 262 buf = append(buf, lowerhex[byte(r)>>4]) 263 buf = append(buf, lowerhex[byte(r)&0xF]) 264 case r > utf8.MaxRune: 265 r = 0xFFFD 266 fallthrough 267 case r < 0x10000: 268 buf = append(buf, 'u') 269 for s := 12; s >= 0; s -= 4 { 270 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 271 } 272 default: 273 buf = append(buf, 'U') 274 for s := 28; s >= 0; s -= 4 { 275 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 276 } 277 } 278 } 279 return buf 280 } 281 282 func (f *Form) appendEscape(buf []byte) []byte { 283 buf = append(buf, '\\') 284 for i := 0; i < f.hashCount; i++ { 285 buf = append(buf, '#') 286 } 287 return buf 288 } 289 290 // requiredHashCount returns the number of # characters 291 // that are required to quote the multiline string s. 292 func (f *Form) requiredHashCount(s string) int { 293 hashCount := 0 294 i := 0 295 // Find all occurrences of the triple-quote and count 296 // the maximum number of succeeding # characters. 297 for { 298 j := strings.Index(s[i:], f.tripleQuote) 299 if j == -1 { 300 break 301 } 302 i += j + 3 303 // Absorb all extra quotes, so we 304 // get to the end of the sequence. 305 for ; i < len(s); i++ { 306 if s[i] != f.quote { 307 break 308 } 309 } 310 e := i - 1 311 // Count succeeding # characters. 312 for ; i < len(s); i++ { 313 if s[i] != '#' { 314 break 315 } 316 } 317 if nhash := i - e; nhash > hashCount { 318 hashCount = nhash 319 } 320 } 321 return hashCount 322 } 323 324 // isInGraphicList reports whether the rune is in the isGraphic list. This separation 325 // from IsGraphic allows quoteWith to avoid two calls to IsPrint. 326 // Should be called only if IsPrint fails. 327 func isInGraphicList(r rune) bool { 328 // We know r must fit in 16 bits - see makeisprint.go. 329 if r > 0xFFFF { 330 return false 331 } 332 rr := uint16(r) 333 i := bsearch16(isGraphic, rr) 334 return i < len(isGraphic) && rr == isGraphic[i] 335 } 336 337 // bsearch16 returns the smallest i such that a[i] >= x. 338 // If there is no such i, bsearch16 returns len(a). 339 func bsearch16(a []uint16, x uint16) int { 340 i, j := 0, len(a) 341 for i < j { 342 h := i + (j-i)/2 343 if a[h] < x { 344 i = h + 1 345 } else { 346 j = h 347 } 348 } 349 return i 350 } 351 352 // isGraphic lists the graphic runes not matched by IsPrint. 353 var isGraphic = []uint16{ 354 0x00a0, 355 0x1680, 356 0x2000, 357 0x2001, 358 0x2002, 359 0x2003, 360 0x2004, 361 0x2005, 362 0x2006, 363 0x2007, 364 0x2008, 365 0x2009, 366 0x200a, 367 0x202f, 368 0x205f, 369 0x3000, 370 }