cuelang.org/go@v0.13.0/cue/literal/quote.go (about) 1 // Copyright 2020 CUE Authors 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package literal 16 17 import ( 18 "strconv" 19 "strings" 20 "unicode/utf8" 21 ) 22 23 // Form defines how to quote a string or bytes literal. 24 type Form struct { 25 hashCount int 26 quote byte 27 multiline bool 28 auto bool 29 exact bool 30 asciiOnly bool 31 graphicOnly bool 32 indent string 33 tripleQuote string 34 } 35 36 // TODO: 37 // - Fixed or max level of escape modifiers (#""#). 38 // - Option to fall back to bytes if value cannot be represented as string. 39 // E.g. ExactString. 40 // - QuoteExact that fails with an error if a string cannot be represented 41 // without loss. 42 // - Handle auto-breaking for long lines (Swift-style, \-terminated lines). 43 // This is not supported yet in CUE, but may, and should be considered as 44 // a possibility in API design. 45 // - Other possible convenience forms: Blob (auto-break bytes), String (bytes 46 // or string), Label. 47 48 // WithTabIndent returns a new Form with indentation set to the given number 49 // of tabs. The result will be a multiline string. 50 func (f Form) WithTabIndent(n int) Form { 51 f.indent = strings.Repeat("\t", n) 52 f.multiline = true 53 return f 54 } 55 56 // WithOptionalIndent is like WithTabIndent, but only returns a multiline 57 // strings if it doesn't contain any newline characters. 58 func (f Form) WithOptionalTabIndent(tabs int) Form { 59 f.indent = strings.Repeat("\t", tabs) 60 f.auto = true 61 return f 62 } 63 64 // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII 65 // characters. 66 func (f Form) WithASCIIOnly() Form { 67 f.asciiOnly = true 68 return f 69 } 70 71 // WithGraphicOnly ensures the quoted strings consists solely of printable 72 // characters. 73 func (f Form) WithGraphicOnly() Form { 74 f.graphicOnly = true 75 return f 76 } 77 78 var ( 79 // String defines the format of a CUE string. Conversions may be lossy. 80 String Form = stringForm 81 82 // TODO: ExactString: quotes to bytes type if the string cannot be 83 // represented without loss of accuracy. 84 85 // Label is like String, but optimized for labels. 86 Label Form = stringForm 87 88 // Bytes defines the format of bytes literal. 89 Bytes Form = bytesForm 90 91 stringForm = Form{ 92 quote: '"', 93 tripleQuote: `"""`, 94 } 95 bytesForm = Form{ 96 quote: '\'', 97 tripleQuote: `'''`, 98 exact: true, 99 } 100 ) 101 102 // Quote returns CUE string literal representing s. The returned string uses CUE 103 // escape sequences (\t, \n, \u00FF, \u0100) for control characters and 104 // non-printable characters as defined by strconv.IsPrint. 105 // 106 // It reports an error if the string cannot be converted to the desired form. 107 func (f Form) Quote(s string) string { 108 return string(f.Append(make([]byte, 0, 3*len(s)/2), s)) 109 } 110 111 const ( 112 lowerhex = "0123456789abcdef" 113 ) 114 115 // Append appends a CUE string literal representing s, as generated by Quote, to 116 // buf and returns the extended buffer. 117 func (f Form) Append(buf []byte, s string) []byte { 118 if f.auto && strings.ContainsRune(s, '\n') { 119 f.multiline = true 120 } 121 if f.multiline { 122 f.hashCount = f.requiredHashCount(s) 123 } 124 125 // Often called with big strings, so preallocate. If there's quoting, 126 // this is conservative but still helps a lot. 127 if cap(buf)-len(buf) < len(s) { 128 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 129 copy(nBuf, buf) 130 buf = nBuf 131 } 132 for range f.hashCount { 133 buf = append(buf, '#') 134 } 135 if f.multiline { 136 buf = append(buf, f.quote, f.quote, f.quote, '\n') 137 if s == "" { 138 buf = append(buf, f.indent...) 139 buf = append(buf, f.quote, f.quote, f.quote) 140 return buf 141 } 142 if len(s) > 0 && s[0] != '\n' { 143 buf = append(buf, f.indent...) 144 } 145 } else { 146 buf = append(buf, f.quote) 147 } 148 149 buf = f.appendEscaped(buf, s) 150 151 if f.multiline { 152 buf = append(buf, '\n') 153 buf = append(buf, f.indent...) 154 buf = append(buf, f.quote, f.quote, f.quote) 155 } else { 156 buf = append(buf, f.quote) 157 } 158 for range f.hashCount { 159 buf = append(buf, '#') 160 } 161 162 return buf 163 } 164 165 // AppendEscaped appends a CUE string literal representing s, as generated by 166 // Quote but without the quotes, to buf and returns the extended buffer. 167 // 168 // It does not include the last indentation. 169 func (f Form) AppendEscaped(buf []byte, s string) []byte { 170 if f.auto && strings.ContainsRune(s, '\n') { 171 f.multiline = true 172 } 173 174 // Often called with big strings, so preallocate. If there's quoting, 175 // this is conservative but still helps a lot. 176 if cap(buf)-len(buf) < len(s) { 177 nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1) 178 copy(nBuf, buf) 179 buf = nBuf 180 } 181 182 buf = f.appendEscaped(buf, s) 183 184 return buf 185 } 186 187 func (f Form) appendEscaped(buf []byte, s string) []byte { 188 for width := 0; len(s) > 0; s = s[width:] { 189 r := rune(s[0]) 190 width = 1 191 if r >= utf8.RuneSelf { 192 r, width = utf8.DecodeRuneInString(s) 193 } 194 if f.exact && width == 1 && r == utf8.RuneError { 195 buf = append(buf, `\x`...) 196 buf = append(buf, lowerhex[s[0]>>4]) 197 buf = append(buf, lowerhex[s[0]&0xF]) 198 continue 199 } 200 if f.multiline && r == '\n' { 201 buf = append(buf, '\n') 202 if len(s) > 1 && s[1] != '\n' { 203 buf = append(buf, f.indent...) 204 } 205 continue 206 } 207 buf = f.appendEscapedRune(buf, r) 208 } 209 return buf 210 } 211 212 func (f *Form) appendEscapedRune(buf []byte, r rune) []byte { 213 if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed 214 buf = f.appendEscape(buf) 215 buf = append(buf, byte(r)) 216 return buf 217 } 218 if f.asciiOnly { 219 if r < utf8.RuneSelf && strconv.IsPrint(r) { 220 buf = append(buf, byte(r)) 221 return buf 222 } 223 } else if strconv.IsPrint(r) || (f.graphicOnly && strconv.IsGraphic(r)) { 224 buf = utf8.AppendRune(buf, r) 225 return buf 226 } 227 buf = f.appendEscape(buf) 228 switch r { 229 case '\a': 230 buf = append(buf, 'a') 231 case '\b': 232 buf = append(buf, 'b') 233 case '\f': 234 buf = append(buf, 'f') 235 case '\n': 236 buf = append(buf, 'n') 237 case '\r': 238 buf = append(buf, 'r') 239 case '\t': 240 buf = append(buf, 't') 241 case '\v': 242 buf = append(buf, 'v') 243 default: 244 switch { 245 case r < ' ' && f.exact: 246 buf = append(buf, 'x') 247 buf = append(buf, lowerhex[byte(r)>>4]) 248 buf = append(buf, lowerhex[byte(r)&0xF]) 249 case r > utf8.MaxRune: 250 r = 0xFFFD 251 fallthrough 252 case r < 0x10000: 253 buf = append(buf, 'u') 254 for s := 12; s >= 0; s -= 4 { 255 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 256 } 257 default: 258 buf = append(buf, 'U') 259 for s := 28; s >= 0; s -= 4 { 260 buf = append(buf, lowerhex[r>>uint(s)&0xF]) 261 } 262 } 263 } 264 return buf 265 } 266 267 func (f *Form) appendEscape(buf []byte) []byte { 268 buf = append(buf, '\\') 269 for range f.hashCount { 270 buf = append(buf, '#') 271 } 272 return buf 273 } 274 275 // requiredHashCount returns the number of # characters 276 // that are required to quote the multiline string s. 277 func (f *Form) requiredHashCount(s string) int { 278 hashCount := 0 279 i := 0 280 // Find all occurrences of the triple-quote and count 281 // the maximum number of succeeding # characters. 282 for { 283 j := strings.Index(s[i:], f.tripleQuote) 284 if j == -1 { 285 break 286 } 287 i += j + 3 288 // Absorb all extra quotes, so we 289 // get to the end of the sequence. 290 for ; i < len(s); i++ { 291 if s[i] != f.quote { 292 break 293 } 294 } 295 e := i - 1 296 // Count succeeding # characters. 297 for ; i < len(s); i++ { 298 if s[i] != '#' { 299 break 300 } 301 } 302 if nhash := i - e; nhash > hashCount { 303 hashCount = nhash 304 } 305 } 306 return hashCount 307 }