cuelang.org/go@v0.13.0/cue/literal/quote.go (about)

     1  // Copyright 2020 CUE Authors
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package literal
    16  
    17  import (
    18  	"strconv"
    19  	"strings"
    20  	"unicode/utf8"
    21  )
    22  
    23  // Form defines how to quote a string or bytes literal.
    24  type Form struct {
    25  	hashCount   int
    26  	quote       byte
    27  	multiline   bool
    28  	auto        bool
    29  	exact       bool
    30  	asciiOnly   bool
    31  	graphicOnly bool
    32  	indent      string
    33  	tripleQuote string
    34  }
    35  
    36  // TODO:
    37  // - Fixed or max level of escape modifiers (#""#).
    38  // - Option to fall back to bytes if value cannot be represented as string.
    39  //   E.g. ExactString.
    40  // - QuoteExact that fails with an error if a string cannot be represented
    41  //   without loss.
    42  // - Handle auto-breaking for long lines (Swift-style, \-terminated lines).
    43  //   This is not supported yet in CUE, but may, and should be considered as
    44  //   a possibility in API design.
    45  // - Other possible convenience forms: Blob (auto-break bytes), String (bytes
    46  //   or string), Label.
    47  
    48  // WithTabIndent returns a new Form with indentation set to the given number
    49  // of tabs. The result will be a multiline string.
    50  func (f Form) WithTabIndent(n int) Form {
    51  	f.indent = strings.Repeat("\t", n)
    52  	f.multiline = true
    53  	return f
    54  }
    55  
    56  // WithOptionalIndent is like WithTabIndent, but only returns a multiline
    57  // strings if it doesn't contain any newline characters.
    58  func (f Form) WithOptionalTabIndent(tabs int) Form {
    59  	f.indent = strings.Repeat("\t", tabs)
    60  	f.auto = true
    61  	return f
    62  }
    63  
    64  // WithASCIIOnly ensures the quoted strings consists solely of valid ASCII
    65  // characters.
    66  func (f Form) WithASCIIOnly() Form {
    67  	f.asciiOnly = true
    68  	return f
    69  }
    70  
    71  // WithGraphicOnly ensures the quoted strings consists solely of printable
    72  // characters.
    73  func (f Form) WithGraphicOnly() Form {
    74  	f.graphicOnly = true
    75  	return f
    76  }
    77  
    78  var (
    79  	// String defines the format of a CUE string. Conversions may be lossy.
    80  	String Form = stringForm
    81  
    82  	// TODO: ExactString: quotes to bytes type if the string cannot be
    83  	// represented without loss of accuracy.
    84  
    85  	// Label is like String, but optimized for labels.
    86  	Label Form = stringForm
    87  
    88  	// Bytes defines the format of bytes literal.
    89  	Bytes Form = bytesForm
    90  
    91  	stringForm = Form{
    92  		quote:       '"',
    93  		tripleQuote: `"""`,
    94  	}
    95  	bytesForm = Form{
    96  		quote:       '\'',
    97  		tripleQuote: `'''`,
    98  		exact:       true,
    99  	}
   100  )
   101  
   102  // Quote returns CUE string literal representing s. The returned string uses CUE
   103  // escape sequences (\t, \n, \u00FF, \u0100) for control characters and
   104  // non-printable characters as defined by strconv.IsPrint.
   105  //
   106  // It reports an error if the string cannot be converted to the desired form.
   107  func (f Form) Quote(s string) string {
   108  	return string(f.Append(make([]byte, 0, 3*len(s)/2), s))
   109  }
   110  
   111  const (
   112  	lowerhex = "0123456789abcdef"
   113  )
   114  
   115  // Append appends a CUE string literal representing s, as generated by Quote, to
   116  // buf and returns the extended buffer.
   117  func (f Form) Append(buf []byte, s string) []byte {
   118  	if f.auto && strings.ContainsRune(s, '\n') {
   119  		f.multiline = true
   120  	}
   121  	if f.multiline {
   122  		f.hashCount = f.requiredHashCount(s)
   123  	}
   124  
   125  	// Often called with big strings, so preallocate. If there's quoting,
   126  	// this is conservative but still helps a lot.
   127  	if cap(buf)-len(buf) < len(s) {
   128  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   129  		copy(nBuf, buf)
   130  		buf = nBuf
   131  	}
   132  	for range f.hashCount {
   133  		buf = append(buf, '#')
   134  	}
   135  	if f.multiline {
   136  		buf = append(buf, f.quote, f.quote, f.quote, '\n')
   137  		if s == "" {
   138  			buf = append(buf, f.indent...)
   139  			buf = append(buf, f.quote, f.quote, f.quote)
   140  			return buf
   141  		}
   142  		if len(s) > 0 && s[0] != '\n' {
   143  			buf = append(buf, f.indent...)
   144  		}
   145  	} else {
   146  		buf = append(buf, f.quote)
   147  	}
   148  
   149  	buf = f.appendEscaped(buf, s)
   150  
   151  	if f.multiline {
   152  		buf = append(buf, '\n')
   153  		buf = append(buf, f.indent...)
   154  		buf = append(buf, f.quote, f.quote, f.quote)
   155  	} else {
   156  		buf = append(buf, f.quote)
   157  	}
   158  	for range f.hashCount {
   159  		buf = append(buf, '#')
   160  	}
   161  
   162  	return buf
   163  }
   164  
   165  // AppendEscaped appends a CUE string literal representing s, as generated by
   166  // Quote but without the quotes, to buf and returns the extended buffer.
   167  //
   168  // It does not include the last indentation.
   169  func (f Form) AppendEscaped(buf []byte, s string) []byte {
   170  	if f.auto && strings.ContainsRune(s, '\n') {
   171  		f.multiline = true
   172  	}
   173  
   174  	// Often called with big strings, so preallocate. If there's quoting,
   175  	// this is conservative but still helps a lot.
   176  	if cap(buf)-len(buf) < len(s) {
   177  		nBuf := make([]byte, len(buf), len(buf)+1+len(s)+1)
   178  		copy(nBuf, buf)
   179  		buf = nBuf
   180  	}
   181  
   182  	buf = f.appendEscaped(buf, s)
   183  
   184  	return buf
   185  }
   186  
   187  func (f Form) appendEscaped(buf []byte, s string) []byte {
   188  	for width := 0; len(s) > 0; s = s[width:] {
   189  		r := rune(s[0])
   190  		width = 1
   191  		if r >= utf8.RuneSelf {
   192  			r, width = utf8.DecodeRuneInString(s)
   193  		}
   194  		if f.exact && width == 1 && r == utf8.RuneError {
   195  			buf = append(buf, `\x`...)
   196  			buf = append(buf, lowerhex[s[0]>>4])
   197  			buf = append(buf, lowerhex[s[0]&0xF])
   198  			continue
   199  		}
   200  		if f.multiline && r == '\n' {
   201  			buf = append(buf, '\n')
   202  			if len(s) > 1 && s[1] != '\n' {
   203  				buf = append(buf, f.indent...)
   204  			}
   205  			continue
   206  		}
   207  		buf = f.appendEscapedRune(buf, r)
   208  	}
   209  	return buf
   210  }
   211  
   212  func (f *Form) appendEscapedRune(buf []byte, r rune) []byte {
   213  	if (!f.multiline && r == rune(f.quote)) || r == '\\' { // always backslashed
   214  		buf = f.appendEscape(buf)
   215  		buf = append(buf, byte(r))
   216  		return buf
   217  	}
   218  	if f.asciiOnly {
   219  		if r < utf8.RuneSelf && strconv.IsPrint(r) {
   220  			buf = append(buf, byte(r))
   221  			return buf
   222  		}
   223  	} else if strconv.IsPrint(r) || (f.graphicOnly && strconv.IsGraphic(r)) {
   224  		buf = utf8.AppendRune(buf, r)
   225  		return buf
   226  	}
   227  	buf = f.appendEscape(buf)
   228  	switch r {
   229  	case '\a':
   230  		buf = append(buf, 'a')
   231  	case '\b':
   232  		buf = append(buf, 'b')
   233  	case '\f':
   234  		buf = append(buf, 'f')
   235  	case '\n':
   236  		buf = append(buf, 'n')
   237  	case '\r':
   238  		buf = append(buf, 'r')
   239  	case '\t':
   240  		buf = append(buf, 't')
   241  	case '\v':
   242  		buf = append(buf, 'v')
   243  	default:
   244  		switch {
   245  		case r < ' ' && f.exact:
   246  			buf = append(buf, 'x')
   247  			buf = append(buf, lowerhex[byte(r)>>4])
   248  			buf = append(buf, lowerhex[byte(r)&0xF])
   249  		case r > utf8.MaxRune:
   250  			r = 0xFFFD
   251  			fallthrough
   252  		case r < 0x10000:
   253  			buf = append(buf, 'u')
   254  			for s := 12; s >= 0; s -= 4 {
   255  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   256  			}
   257  		default:
   258  			buf = append(buf, 'U')
   259  			for s := 28; s >= 0; s -= 4 {
   260  				buf = append(buf, lowerhex[r>>uint(s)&0xF])
   261  			}
   262  		}
   263  	}
   264  	return buf
   265  }
   266  
   267  func (f *Form) appendEscape(buf []byte) []byte {
   268  	buf = append(buf, '\\')
   269  	for range f.hashCount {
   270  		buf = append(buf, '#')
   271  	}
   272  	return buf
   273  }
   274  
   275  // requiredHashCount returns the number of # characters
   276  // that are required to quote the multiline string s.
   277  func (f *Form) requiredHashCount(s string) int {
   278  	hashCount := 0
   279  	i := 0
   280  	// Find all occurrences of the triple-quote and count
   281  	// the maximum number of succeeding # characters.
   282  	for {
   283  		j := strings.Index(s[i:], f.tripleQuote)
   284  		if j == -1 {
   285  			break
   286  		}
   287  		i += j + 3
   288  		// Absorb all extra quotes, so we
   289  		// get to the end of the sequence.
   290  		for ; i < len(s); i++ {
   291  			if s[i] != f.quote {
   292  				break
   293  			}
   294  		}
   295  		e := i - 1
   296  		// Count succeeding # characters.
   297  		for ; i < len(s); i++ {
   298  			if s[i] != '#' {
   299  				break
   300  			}
   301  		}
   302  		if nhash := i - e; nhash > hashCount {
   303  			hashCount = nhash
   304  		}
   305  	}
   306  	return hashCount
   307  }