cuelang.org/go@v0.10.1/cue/literal/quote.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package literal 16 17 import ( 18 "strconv" 19 "strings" 20 "unicode/utf8" 21 ) 22 23 // Form defines how to quote a string or bytes literal. 24 type Form struct { 25 hashCount int 26 quote byte 27 multiline bool 28 auto bool 29 exact bool 30 asciiOnly bool 31 graphicOnly bool 32 indent string 33 tripleQuote string 34 } 35 36 // TODO: 37 // - Fixed or max level of escape modifiers (#""#). 38 // - Option to fall back to bytes if value cannot be represented as string. 39 // E.g. ExactString. 40 // - QuoteExact that fails with an error if a string cannot be represented 41 // without loss. 42 // - Handle auto-breaking for long lines (Swift-style, \-terminated lines). 43 // This is not supported yet in CUE, but may, and should be considered as 44 // a possibility in API design. 45 // - Other possible convenience forms: Blob (auto-break bytes), String (bytes 46 // or string), Label. 47 48 // WithTabIndent returns a new Form with indentation set to the given number 49 // of tabs. The result will be a multiline string. 50 func (f Form) WithTabIndent(n int) Form { 51 f.indent = tabs(n) 52 f.multiline = true 53 return f 54 } 55 56 const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t" 57 58 func tabs(n int) string { 59 if n < len(tabIndent) { 60 return tabIndent[:n] 61 } 62 return strings.Repeat("\t", n) 63 } 64 65 // WithOptionalIndent is like WithTabIndent, but only returns a multiline 66 // strings if it doesn't contain any newline characters. 67 func (f Form) WithOptionalTabIndent(tabs int) Form { 68 // TODO(mvdan): remove this optimization once Go 1.23 lands with https://go.dev/cl/536615 69 if tabs < len(tabIndent) { 70 f.indent = tabIndent[:tabs] 71 } else { 72 f.indent = strings.Repeat("\t", tabs) 73 } 74 f.auto = true 75 return f 76 } 77 78 // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII 79 // characters. 80 func (f Form) WithASCIIOnly() Form { 81 f.asciiOnly = true 82 return f 83 } 84 85 // WithGraphicOnly ensures the quoted strings consists solely of printable 86 // characters. 87 func (f Form) WithGraphicOnly() Form { 88 f.graphicOnly = true 89 return f 90 } 91 92 var ( 93 // String defines the format of a CUE string. Conversions may be lossy. 94 String Form = stringForm 95 96 // TODO: ExactString: quotes to bytes type if the string cannot be 97 // represented without loss of accuracy. 98 99 // Label is like String, but optimized for labels. 100 Label Form = stringForm 101 102 // Bytes defines the format of bytes literal. 103 Bytes Form = bytesForm 104 105 stringForm = Form{ 106 quote: '"', 107 tripleQuote: `"""`, 108 } 109 bytesForm = Form{ 110 quote: '\'', 111 tripleQuote: `'''`, 112 exact: true, 113 } 114 ) 115 116 // Quote returns CUE string literal representing s. The returned string uses CUE 117 // escape sequences (\t, \n, \u00FF, \u0100) for control characters and 118 // non-printable characters as defined by strconv.IsPrint. 119 // 120 // It reports an error if the string cannot be converted to the desired form. 121 func (f Form) Quote(s string) string { 122 return string(f.Append(make([]byte, 0, 3*len(s)/2), s)) 123 } 124 125 const ( 126 lowerhex = "0123456789abcdef" 127 ) 128 129 // Append appends a CUE string literal representing s, as generated by Quote, to 130 // buf and returns the extended buffer. 131 func (f Form) Append(buf []byte, s string) []byte { 132 if f.auto && strings.ContainsRune(s, '\n') { 133 f.multiline = true 134 } 135 if f.multiline { 136 f.hashCount = f.requiredHashCount(s) 137 } 138 139 // Often called with big strings, so preallocate. If there's quoting, 140 // this is conservative but still helps a lot. 141 if cap(buf)-len(buf) < len(s) { 142 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 143 copy(nBuf, buf) 144 buf = nBuf 145 } 146 for range f.hashCount { 147 buf = append(buf, '#') 148 } 149 if f.multiline { 150 buf = append(buf, f.quote, f.quote, f.quote, '\n') 151 if s == "" { 152 buf = append(buf, f.indent...) 153 buf = append(buf, f.quote, f.quote, f.quote) 154 return buf 155 } 156 if len(s) > 0 && s[0] != '\n' { 157 buf = append(buf, f.indent...) 158 } 159 } else { 160 buf = append(buf, f.quote) 161 } 162 163 buf = f.appendEscaped(buf, s) 164 165 if f.multiline { 166 buf = append(buf, '\n') 167 buf = append(buf, f.indent...) 168 buf = append(buf, f.quote, f.quote, f.quote) 169 } else { 170 buf = append(buf, f.quote) 171 } 172 for range f.hashCount { 173 buf = append(buf, '#') 174 } 175 176 return buf 177 } 178 179 // AppendEscaped appends a CUE string literal representing s, as generated by 180 // Quote but without the quotes, to buf and returns the extended buffer. 181 // 182 // It does not include the last indentation. 183 func (f Form) AppendEscaped(buf []byte, s string) []byte { 184 if f.auto && strings.ContainsRune(s, '\n') { 185 f.multiline = true 186 } 187 188 // Often called with big strings, so preallocate. If there's quoting, 189 // this is conservative but still helps a lot. 190 if cap(buf)-len(buf) < len(s) { 191 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 192 copy(nBuf, buf) 193 buf = nBuf 194 } 195 196 buf = f.appendEscaped(buf, s) 197 198 return buf 199 } 200 201 func (f Form) appendEscaped(buf []byte, s string) []byte { 202 for width := 0; len(s) > 0; s = s[width:] { 203 r := rune(s[0]) 204 width = 1 205 if r >= utf8.RuneSelf { 206 r, width = utf8.DecodeRuneInString(s) 207 } 208 if f.exact && width == 1 && r == utf8.RuneError { 209 buf = append(buf, `\x`...) 210 buf = append(buf, lowerhex[s[0]>>4]) 211 buf = append(buf, lowerhex[s[0]&0xF]) 212 continue 213 } 214 if f.multiline && r == '\n' { 215 buf = append(buf, '\n') 216 if len(s) > 1 && s[1] != '\n' { 217 buf = append(buf, f.indent...) 218 } 219 continue 220 } 221 buf = f.appendEscapedRune(buf, r) 222 } 223 return buf 224 } 225 226 func (f *Form) appendEscapedRune(buf []byte, r rune) []byte { 227 if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed 228 buf = f.appendEscape(buf) 229 buf = append(buf, byte(r)) 230 return buf 231 } 232 if f.asciiOnly { 233 if r < utf8.RuneSelf && strconv.IsPrint(r) { 234 buf = append(buf, byte(r)) 235 return buf 236 } 237 } else if strconv.IsPrint(r) || (f.graphicOnly && strconv.IsGraphic(r)) { 238 buf = utf8.AppendRune(buf, r) 239 return buf 240 } 241 buf = f.appendEscape(buf) 242 switch r { 243 case '\a': 244 buf = append(buf, 'a') 245 case '\b': 246 buf = append(buf, 'b') 247 case '\f': 248 buf = append(buf, 'f') 249 case '\n': 250 buf = append(buf, 'n') 251 case '\r': 252 buf = append(buf, 'r') 253 case '\t': 254 buf = append(buf, 't') 255 case '\v': 256 buf = append(buf, 'v') 257 default: 258 switch { 259 case r < ' ' && f.exact: 260 buf = append(buf, 'x') 261 buf = append(buf, lowerhex[byte(r)>>4]) 262 buf = append(buf, lowerhex[byte(r)&0xF]) 263 case r > utf8.MaxRune: 264 r = 0xFFFD 265 fallthrough 266 case r < 0x10000: 267 buf = append(buf, 'u') 268 for s := 12; s >= 0; s -= 4 { 269 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 270 } 271 default: 272 buf = append(buf, 'U') 273 for s := 28; s >= 0; s -= 4 { 274 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 275 } 276 } 277 } 278 return buf 279 } 280 281 func (f *Form) appendEscape(buf []byte) []byte { 282 buf = append(buf, '\\') 283 for range f.hashCount { 284 buf = append(buf, '#') 285 } 286 return buf 287 } 288 289 // requiredHashCount returns the number of # characters 290 // that are required to quote the multiline string s. 291 func (f *Form) requiredHashCount(s string) int { 292 hashCount := 0 293 i := 0 294 // Find all occurrences of the triple-quote and count 295 // the maximum number of succeeding # characters. 296 for { 297 j := strings.Index(s[i:], f.tripleQuote) 298 if j == -1 { 299 break 300 } 301 i += j + 3 302 // Absorb all extra quotes, so we 303 // get to the end of the sequence. 304 for ; i < len(s); i++ { 305 if s[i] != f.quote { 306 break 307 } 308 } 309 e := i - 1 310 // Count succeeding # characters. 311 for ; i < len(s); i++ { 312 if s[i] != '#' { 313 break 314 } 315 } 316 if nhash := i - e; nhash > hashCount { 317 hashCount = nhash 318 } 319 } 320 return hashCount 321 }