cuelang.org/go@v0.10.1/cue/literal/quote.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package literal
    16  
    17  import (
    18  	"strconv"
    19  	"strings"
    20  	"unicode/utf8"
    21  )
    22  
    23  // Form defines how to quote a string or bytes literal.
    24  type Form struct {
    25  	hashCount   int
    26  	quote       byte
    27  	multiline   bool
    28  	auto        bool
    29  	exact       bool
    30  	asciiOnly   bool
    31  	graphicOnly bool
    32  	indent      string
    33  	tripleQuote string
    34  }
    35  
    36  // TODO:
    37  // - Fixed or max level of escape modifiers (#""#).
    38  // - Option to fall back to bytes if value cannot be represented as string.
    39  //   E.g. ExactString.
    40  // - QuoteExact that fails with an error if a string cannot be represented
    41  //   without loss.
    42  // - Handle auto-breaking for long lines (Swift-style, \-terminated lines).
    43  //   This is not supported yet in CUE, but may, and should be considered as
    44  //   a possibility in API design.
    45  // - Other possible convenience forms: Blob (auto-break bytes), String (bytes
    46  //   or string), Label.
    47  
    48  // WithTabIndent returns a new Form with indentation set to the given number
    49  // of tabs. The result will be a multiline string.
    50  func (f Form) WithTabIndent(n int) Form {
    51  	f.indent = tabs(n)
    52  	f.multiline = true
    53  	return f
    54  }
    55  
    56  const tabIndent = "\t\t\t\t\t\t\t\t\t\t\t\t"
    57  
    58  func tabs(n int) string {
    59  	if n < len(tabIndent) {
    60  		return tabIndent[:n]
    61  	}
    62  	return strings.Repeat("\t", n)
    63  }
    64  
    65  // WithOptionalIndent is like WithTabIndent, but only returns a multiline
    66  // strings if it doesn't contain any newline characters.
    67  func (f Form) WithOptionalTabIndent(tabs int) Form {
    68  	// TODO(mvdan): remove this optimization once Go 1.23 lands with https://go.dev/cl/536615
    69  	if tabs < len(tabIndent) {
    70  		f.indent = tabIndent[:tabs]
    71  	} else {
    72  		f.indent = strings.Repeat("\t", tabs)
    73  	}
    74  	f.auto = true
    75  	return f
    76  }
    77  
    78  // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII
    79  // characters.
    80  func (f Form) WithASCIIOnly() Form {
    81  	f.asciiOnly = true
    82  	return f
    83  }
    84  
    85  // WithGraphicOnly ensures the quoted strings consists solely of printable
    86  // characters.
    87  func (f Form) WithGraphicOnly() Form {
    88  	f.graphicOnly = true
    89  	return f
    90  }
    91  
    92  var (
    93  	// String defines the format of a CUE string. Conversions may be lossy.
    94  	String Form = stringForm
    95  
    96  	// TODO: ExactString: quotes to bytes type if the string cannot be
    97  	// represented without loss of accuracy.
    98  
    99  	// Label is like String, but optimized for labels.
   100  	Label Form = stringForm
   101  
   102  	// Bytes defines the format of bytes literal.
   103  	Bytes Form = bytesForm
   104  
   105  	stringForm = Form{
   106  		quote:       '"',
   107  		tripleQuote: `"""`,
   108  	}
   109  	bytesForm = Form{
   110  		quote:       '\'',
   111  		tripleQuote: `'''`,
   112  		exact:       true,
   113  	}
   114  )
   115  
   116  // Quote returns CUE string literal representing s. The returned string uses CUE
   117  // escape sequences (\t, \n, \u00FF, \u0100) for control characters and
   118  // non-printable characters as defined by strconv.IsPrint.
   119  //
   120  // It reports an error if the string cannot be converted to the desired form.
   121  func (f Form) Quote(s string) string {
   122  	return string(f.Append(make([]byte, 0, 3*len(s)/2), s))
   123  }
   124  
   125  const (
   126  	lowerhex = "0123456789abcdef"
   127  )
   128  
   129  // Append appends a CUE string literal representing s, as generated by Quote, to
   130  // buf and returns the extended buffer.
   131  func (f Form) Append(buf []byte, s string) []byte {
   132  	if f.auto && strings.ContainsRune(s, '\n') {
   133  		f.multiline = true
   134  	}
   135  	if f.multiline {
   136  		f.hashCount = f.requiredHashCount(s)
   137  	}
   138  
   139  	// Often called with big strings, so preallocate. If there's quoting,
   140  	// this is conservative but still helps a lot.
   141  	if cap(buf)-len(buf) < len(s) {
   142  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   143  		copy(nBuf, buf)
   144  		buf = nBuf
   145  	}
   146  	for range f.hashCount {
   147  		buf = append(buf, '#')
   148  	}
   149  	if f.multiline {
   150  		buf = append(buf, f.quote, f.quote, f.quote, '\n')
   151  		if s == "" {
   152  			buf = append(buf, f.indent...)
   153  			buf = append(buf, f.quote, f.quote, f.quote)
   154  			return buf
   155  		}
   156  		if len(s) > 0 && s[0] != '\n' {
   157  			buf = append(buf, f.indent...)
   158  		}
   159  	} else {
   160  		buf = append(buf, f.quote)
   161  	}
   162  
   163  	buf = f.appendEscaped(buf, s)
   164  
   165  	if f.multiline {
   166  		buf = append(buf, '\n')
   167  		buf = append(buf, f.indent...)
   168  		buf = append(buf, f.quote, f.quote, f.quote)
   169  	} else {
   170  		buf = append(buf, f.quote)
   171  	}
   172  	for range f.hashCount {
   173  		buf = append(buf, '#')
   174  	}
   175  
   176  	return buf
   177  }
   178  
   179  // AppendEscaped appends a CUE string literal representing s, as generated by
   180  // Quote but without the quotes, to buf and returns the extended buffer.
   181  //
   182  // It does not include the last indentation.
   183  func (f Form) AppendEscaped(buf []byte, s string) []byte {
   184  	if f.auto && strings.ContainsRune(s, '\n') {
   185  		f.multiline = true
   186  	}
   187  
   188  	// Often called with big strings, so preallocate. If there's quoting,
   189  	// this is conservative but still helps a lot.
   190  	if cap(buf)-len(buf) < len(s) {
   191  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   192  		copy(nBuf, buf)
   193  		buf = nBuf
   194  	}
   195  
   196  	buf = f.appendEscaped(buf, s)
   197  
   198  	return buf
   199  }
   200  
   201  func (f Form) appendEscaped(buf []byte, s string) []byte {
   202  	for width := 0; len(s) > 0; s = s[width:] {
   203  		r := rune(s[0])
   204  		width = 1
   205  		if r >= utf8.RuneSelf {
   206  			r, width = utf8.DecodeRuneInString(s)
   207  		}
   208  		if f.exact && width == 1 && r == utf8.RuneError {
   209  			buf = append(buf, `\x`...)
   210  			buf = append(buf, lowerhex[s[0]>>4])
   211  			buf = append(buf, lowerhex[s[0]&0xF])
   212  			continue
   213  		}
   214  		if f.multiline && r == '\n' {
   215  			buf = append(buf, '\n')
   216  			if len(s) > 1 && s[1] != '\n' {
   217  				buf = append(buf, f.indent...)
   218  			}
   219  			continue
   220  		}
   221  		buf = f.appendEscapedRune(buf, r)
   222  	}
   223  	return buf
   224  }
   225  
   226  func (f *Form) appendEscapedRune(buf []byte, r rune) []byte {
   227  	if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed
   228  		buf = f.appendEscape(buf)
   229  		buf = append(buf, byte(r))
   230  		return buf
   231  	}
   232  	if f.asciiOnly {
   233  		if r < utf8.RuneSelf && strconv.IsPrint(r) {
   234  			buf = append(buf, byte(r))
   235  			return buf
   236  		}
   237  	} else if strconv.IsPrint(r) || (f.graphicOnly && strconv.IsGraphic(r)) {
   238  		buf = utf8.AppendRune(buf, r)
   239  		return buf
   240  	}
   241  	buf = f.appendEscape(buf)
   242  	switch r {
   243  	case '\a':
   244  		buf = append(buf, 'a')
   245  	case '\b':
   246  		buf = append(buf, 'b')
   247  	case '\f':
   248  		buf = append(buf, 'f')
   249  	case '\n':
   250  		buf = append(buf, 'n')
   251  	case '\r':
   252  		buf = append(buf, 'r')
   253  	case '\t':
   254  		buf = append(buf, 't')
   255  	case '\v':
   256  		buf = append(buf, 'v')
   257  	default:
   258  		switch {
   259  		case r < ' ' && f.exact:
   260  			buf = append(buf, 'x')
   261  			buf = append(buf, lowerhex[byte(r)>>4])
   262  			buf = append(buf, lowerhex[byte(r)&0xF])
   263  		case r > utf8.MaxRune:
   264  			r = 0xFFFD
   265  			fallthrough
   266  		case r < 0x10000:
   267  			buf = append(buf, 'u')
   268  			for s := 12; s >= 0; s -= 4 {
   269  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   270  			}
   271  		default:
   272  			buf = append(buf, 'U')
   273  			for s := 28; s >= 0; s -= 4 {
   274  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   275  			}
   276  		}
   277  	}
   278  	return buf
   279  }
   280  
   281  func (f *Form) appendEscape(buf []byte) []byte {
   282  	buf = append(buf, '\\')
   283  	for range f.hashCount {
   284  		buf = append(buf, '#')
   285  	}
   286  	return buf
   287  }
   288  
   289  // requiredHashCount returns the number of # characters
   290  // that are required to quote the multiline string s.
   291  func (f *Form) requiredHashCount(s string) int {
   292  	hashCount := 0
   293  	i := 0
   294  	// Find all occurrences of the triple-quote and count
   295  	// the maximum number of succeeding # characters.
   296  	for {
   297  		j := strings.Index(s[i:], f.tripleQuote)
   298  		if j == -1 {
   299  			break
   300  		}
   301  		i += j + 3
   302  		// Absorb all extra quotes, so we
   303  		// get to the end of the sequence.
   304  		for ; i < len(s); i++ {
   305  			if s[i] != f.quote {
   306  				break
   307  			}
   308  		}
   309  		e := i - 1
   310  		// Count succeeding # characters.
   311  		for ; i < len(s); i++ {
   312  			if s[i] != '#' {
   313  				break
   314  			}
   315  		}
   316  		if nhash := i - e; nhash > hashCount {
   317  			hashCount = nhash
   318  		}
   319  	}
   320  	return hashCount
   321  }