github.com/m3db/m3@v1.5.0/src/query/models/strconv/quote.go (about)

     1  // Copyright (c) 2019 Uber Technologies, Inc.
     2  //
     3  // Permission is hereby granted, free of charge, to any person obtaining a copy
     4  // of this software and associated documentation files (the "Software"), to deal
     5  // in the Software without restriction, including without limitation the rights
     6  // to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
     7  // copies of the Software, and to permit persons to whom the Software is
     8  // furnished to do so, subject to the following conditions:
     9  //
    10  // The above copyright notice and this permission notice shall be included in
    11  // all copies or substantial portions of the Software.
    12  //
    13  // THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
    14  // IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
    15  // FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
    16  // AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
    17  // LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
    18  // OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
    19  // THE SOFTWARE.
    20  
    21  package strconv
    22  
    23  import (
    24  	"strconv"
    25  	"unicode/utf8"
    26  )
    27  
    28  // NB: predefined strconv constants
    29  const (
    30  	tx = 0x80 // 1000 0000
    31  	t2 = 0xC0 // 1100 0000
    32  	t3 = 0xE0 // 1110 0000
    33  	t4 = 0xF0 // 1111 0000
    34  
    35  	maskx = 0x3F // 0011 1111
    36  
    37  	rune1Max = 1<<7 - 1
    38  	rune2Max = 1<<11 - 1
    39  	rune3Max = 1<<16 - 1
    40  
    41  	runeError = '\uFFFD'     // the "error" Rune or "Unicode replacement character"
    42  	maxRune   = '\U0010FFFF' // Maximum valid Unicode code point.
    43  
    44  	// NB: Code points in the surrogate range are not valid for UTF-8.
    45  	surrogateMin = 0xD800
    46  	surrogateMax = 0xDFFF
    47  
    48  	lowerhex = "0123456789abcdef"
    49  
    50  	quote = byte('"')
    51  )
    52  
    53  // EncodeRune writes into src (which must be large enough) the UTF-8 encoding
    54  // of the rune at the given index. It returns the number of bytes written.
    55  //
    56  // NB: based on utf8.encodeRune method, but instead uses indexed insertion
    57  // into a predefined buffer.
    58  func encodeRune(dst []byte, r rune, idx int) int {
    59  	// Negative values are erroneous. Making it unsigned addresses the problem.
    60  	switch i := uint32(r); {
    61  	case i <= rune1Max:
    62  		dst[idx] = byte(r)
    63  		return idx + 1
    64  	case i <= rune2Max:
    65  		dst[idx] = t2 | byte(r>>6)
    66  		dst[idx+1] = tx | byte(r)&maskx
    67  		return idx + 2
    68  	case i > maxRune, surrogateMin <= i && i <= surrogateMax:
    69  		r = runeError
    70  		fallthrough
    71  	case i <= rune3Max:
    72  		dst[idx] = t3 | byte(r>>12)
    73  		dst[idx+2] = tx | byte(r)&maskx
    74  		dst[idx+1] = tx | byte(r>>6)&maskx
    75  		return idx + 3
    76  	default:
    77  		dst[idx] = t4 | byte(r>>18)
    78  		dst[idx+1] = tx | byte(r>>12)&maskx
    79  		dst[idx+2] = tx | byte(r>>6)&maskx
    80  		dst[idx+3] = tx | byte(r)&maskx
    81  		return idx + 4
    82  	}
    83  }
    84  
    85  // It returns the number of bytes written.
    86  func insertEscapedRune(dst []byte, r rune, idx int) int {
    87  	if r == rune(quote) || r == '\\' { // always backslashed
    88  		dst[idx] = '\\'
    89  		dst[idx+1] = byte(r)
    90  		return idx + 2
    91  	}
    92  
    93  	if strconv.IsPrint(r) {
    94  		return encodeRune(dst, r, idx)
    95  	}
    96  
    97  	switch r {
    98  	case '\a':
    99  		dst[idx] = '\\'
   100  		dst[idx+1] = 'a'
   101  		return idx + 2
   102  	case '\b':
   103  		dst[idx] = '\\'
   104  		dst[idx+1] = 'b'
   105  		return idx + 2
   106  	case '\f':
   107  		dst[idx] = '\\'
   108  		dst[idx+1] = 'f'
   109  		return idx + 2
   110  	case '\n':
   111  		dst[idx] = '\\'
   112  		dst[idx+1] = 'n'
   113  		return idx + 2
   114  	case '\r':
   115  		dst[idx] = '\\'
   116  		dst[idx+1] = 'r'
   117  		return idx + 2
   118  	case '\t':
   119  		dst[idx] = '\\'
   120  		dst[idx+1] = 't'
   121  		return idx + 2
   122  	case '\v':
   123  		dst[idx] = '\\'
   124  		dst[idx+1] = 'v'
   125  		return idx + 2
   126  	default:
   127  		switch {
   128  		case r < ' ':
   129  			dst[idx] = '\\'
   130  			dst[idx+1] = 'x'
   131  			dst[idx+2] = lowerhex[byte(r)>>4]
   132  			dst[idx+3] = lowerhex[byte(r)&0xF]
   133  			return idx + 4
   134  		case r > utf8.MaxRune:
   135  			r = 0xFFFD
   136  			fallthrough
   137  		case r < 0x10000:
   138  			dst[idx] = '\\'
   139  			dst[idx+1] = 'u'
   140  			dst[idx+2] = lowerhex[r>>uint(12)&0xF]
   141  			dst[idx+3] = lowerhex[r>>uint(8)&0xF]
   142  			dst[idx+4] = lowerhex[r>>uint(4)&0xF]
   143  			dst[idx+5] = lowerhex[r>>uint(0)&0xF]
   144  			return idx + 6
   145  		default:
   146  			dst[idx] = '\\'
   147  			dst[idx+1] = 'U'
   148  			dst[idx+2] = lowerhex[r>>uint(28)&0xF]
   149  			dst[idx+3] = lowerhex[r>>uint(24)&0xF]
   150  			dst[idx+4] = lowerhex[r>>uint(20)&0xF]
   151  			dst[idx+5] = lowerhex[r>>uint(16)&0xF]
   152  			dst[idx+6] = lowerhex[r>>uint(12)&0xF]
   153  			dst[idx+7] = lowerhex[r>>uint(8)&0xF]
   154  			dst[idx+8] = lowerhex[r>>uint(4)&0xF]
   155  			dst[idx+9] = lowerhex[r>>uint(0)&0xF]
   156  			return idx + 10
   157  		}
   158  	}
   159  }
   160  
   161  // Escape copies byte slice src to dst at a given index, adding escaping any
   162  // quote or control characters. It returns the index at which the copy finished.
   163  //
   164  // NB: ensure that dst is large enough to store src, additional
   165  // quotation runes, and any additional escape characters.
   166  // as generated by Quote, to dst and returns the extended buffer.
   167  func Escape(dst, src []byte, idx int) int {
   168  	// nolint
   169  	for width := 0; len(src) > 0; src = src[width:] {
   170  		r := rune(src[0])
   171  		width = 1
   172  		if r >= utf8.RuneSelf {
   173  			r, width = utf8.DecodeRune(src)
   174  		}
   175  
   176  		if width == 1 && r == utf8.RuneError {
   177  			dst[idx] = '\\'
   178  			dst[idx+1] = 'x'
   179  			dst[idx+2] = lowerhex[src[0]>>4]
   180  			dst[idx+3] = lowerhex[src[0]&0xF]
   181  			idx += 4
   182  			continue
   183  		}
   184  
   185  		idx = insertEscapedRune(dst, r, idx)
   186  	}
   187  
   188  	return idx
   189  }
   190  
   191  // Quote copies byte slice src to dst at a given index, adding
   192  // quotation runes around the src slice and escaping any quote or control
   193  // characters. It returns the index at which the copy finished.
   194  //
   195  // NB: ensure that dst is large enough to store src, additional
   196  // quotation runes, and any additional escape characters.
   197  // as generated by Quote, to dst and returns the extended buffer.
   198  //
   199  // NB: based on stconv.Quote method, but instead uses indexed insertion
   200  // into a predefined buffer.
   201  func Quote(dst, src []byte, idx int) int {
   202  	dst[idx] = quote
   203  	idx++
   204  	idx = Escape(dst, src, idx)
   205  	dst[idx] = quote
   206  	return idx + 1
   207  }
   208  
   209  // QuoteSimple copies byte slice src to dst at a given index, adding
   210  // quotation runes around the src slice, but does not escape any
   211  // characters. It returns the index at which the copy finished.
   212  //
   213  // NB: ensure that dst is large enough to store src and two other characters.
   214  func QuoteSimple(dst, src []byte, idx int) int {
   215  	dst[idx] = quote
   216  	idx++
   217  	idx += copy(dst[idx:], src)
   218  	dst[idx] = quote
   219  	return idx + 1
   220  }
   221  
   222  // EscapedLength computes the length required for a byte slice to hold
   223  // a quoted byte slice.
   224  //
   225  // NB: essentially a dry-run of `Escape` that does not write characters, but
   226  // instead counts total character counts for the destination byte slice.
   227  func EscapedLength(src []byte) int {
   228  	length := 0
   229  	// nolint
   230  	for width := 0; len(src) > 0; src = src[width:] {
   231  		r := rune(src[0])
   232  		width = 1
   233  		if r >= utf8.RuneSelf {
   234  			r, width = utf8.DecodeRune(src)
   235  		}
   236  
   237  		if width == 1 && r == utf8.RuneError {
   238  			length += 4
   239  			continue
   240  		}
   241  
   242  		length += escapedRuneLength(r)
   243  	}
   244  
   245  	return length
   246  }
   247  
   248  // QuotedLength computes the length required for a byte slice to hold
   249  // a quoted byte slice.
   250  //
   251  // NB: essentially a dry-run of `Quote` that does not write characters, but
   252  // instead counts total character counts for the destination byte slice.
   253  func QuotedLength(src []byte) int {
   254  	return 2 + EscapedLength(src) // account for opening and closing quotes
   255  }
   256  
   257  func escapedRuneLength(r rune) int {
   258  	if r == rune(quote) || r == '\\' { // always backslashed
   259  		return 2
   260  	}
   261  
   262  	if strconv.IsPrint(r) {
   263  		switch i := uint32(r); {
   264  		case i <= rune1Max:
   265  			return 1
   266  		case i <= rune2Max:
   267  			return 2
   268  		case i > maxRune, surrogateMin <= i && i <= surrogateMax:
   269  			fallthrough
   270  		case i <= rune3Max:
   271  			return 3
   272  		default:
   273  			return 4
   274  		}
   275  	}
   276  
   277  	switch r {
   278  	case '\a':
   279  		return 2
   280  	case '\b':
   281  		return 2
   282  	case '\f':
   283  		return 2
   284  	case '\n':
   285  		return 2
   286  	case '\r':
   287  		return 2
   288  	case '\t':
   289  		return 2
   290  	case '\v':
   291  		return 2
   292  	default:
   293  		switch {
   294  		case r < ' ':
   295  			return 4
   296  		case r > utf8.MaxRune:
   297  			fallthrough
   298  		case r < 0x10000:
   299  			return 6
   300  		default:
   301  			return 10
   302  		}
   303  	}
   304  }