github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/util/stringencoding/string_encoding.go (about)

     1  // Copyright 2012, Google Inc. All rights reserved.
     2  // Use of this source code is governed by a BSD-style
     3  // license that can be found in licenses/BSD-vitess.txt.
     4  
     5  // Portions of this file are additionally subject to the following
     6  // license and copyright.
     7  //
     8  // Copyright 2017 The Cockroach Authors.
     9  //
    10  // Use of this software is governed by the Business Source License
    11  // included in the file licenses/BSL.txt.
    12  //
    13  // As of the Change Date specified in that file, in accordance with
    14  // the Business Source License, use of this software will be governed
    15  // by the Apache License, Version 2.0, included in the file
    16  // licenses/APL.txt.
    17  
    18  // This code was derived from https://github.com/youtube/vitess.
    19  
    20  package stringencoding
    21  
    22  import (
    23  	"bytes"
    24  	"fmt"
    25  	"unicode/utf8"
    26  )
    27  
    28  // This is its own package so it can be shared among packages that parser
    29  // depends on.
    30  
    31  var (
    32  	// DontEscape is a sentinel value for characters that don't need to be escaped.
    33  	DontEscape = byte(255)
    34  	// EncodeMap specifies how to escape binary data with '\'.
    35  	EncodeMap [256]byte
    36  	// HexMap is a mapping from each byte to the `\x%%` hex form as a []byte.
    37  	HexMap [256][]byte
    38  	// RawHexMap is a mapping from each byte to the `%%` hex form as a []byte.
    39  	RawHexMap [256][]byte
    40  )
    41  
    42  func init() {
    43  	encodeRef := map[byte]byte{
    44  		'\b': 'b',
    45  		'\f': 'f',
    46  		'\n': 'n',
    47  		'\r': 'r',
    48  		'\t': 't',
    49  		'\\': '\\',
    50  	}
    51  
    52  	for i := range EncodeMap {
    53  		EncodeMap[i] = DontEscape
    54  	}
    55  	for i := range EncodeMap {
    56  		if to, ok := encodeRef[byte(i)]; ok {
    57  			EncodeMap[byte(i)] = to
    58  		}
    59  	}
    60  
    61  	// underlyingHexMap contains the string "\x00\x01\x02..." which HexMap and
    62  	// RawHexMap then index into.
    63  	var underlyingHexMap bytes.Buffer
    64  	underlyingHexMap.Grow(1024)
    65  
    66  	for i := 0; i < 256; i++ {
    67  		underlyingHexMap.WriteString("\\x")
    68  		writeHexDigit(&underlyingHexMap, i/16)
    69  		writeHexDigit(&underlyingHexMap, i%16)
    70  	}
    71  
    72  	underlyingHexBytes := underlyingHexMap.Bytes()
    73  
    74  	for i := 0; i < 256; i++ {
    75  		HexMap[i] = underlyingHexBytes[i*4 : i*4+4]
    76  		RawHexMap[i] = underlyingHexBytes[i*4+2 : i*4+4]
    77  	}
    78  }
    79  
    80  // EncodeEscapedChar is used internally to write out a character from a larger
    81  // string that needs to be escaped to a buffer.
    82  func EncodeEscapedChar(
    83  	buf *bytes.Buffer,
    84  	entireString string,
    85  	currentRune rune,
    86  	currentByte byte,
    87  	currentIdx int,
    88  	quoteChar byte,
    89  ) {
    90  	ln := utf8.RuneLen(currentRune)
    91  	if currentRune == utf8.RuneError {
    92  		// Errors are due to invalid unicode points, so escape the bytes.
    93  		// Make sure this is run at least once in case ln == -1.
    94  		buf.Write(HexMap[entireString[currentIdx]])
    95  		for ri := 1; ri < ln; ri++ {
    96  			if currentIdx+ri < len(entireString) {
    97  				buf.Write(HexMap[entireString[currentIdx+ri]])
    98  			}
    99  		}
   100  	} else if ln == 1 {
   101  		// For single-byte runes, do the same as encodeSQLBytes.
   102  		if encodedChar := EncodeMap[currentByte]; encodedChar != DontEscape {
   103  			buf.WriteByte('\\')
   104  			buf.WriteByte(encodedChar)
   105  		} else if currentByte == quoteChar {
   106  			buf.WriteByte('\\')
   107  			buf.WriteByte(quoteChar)
   108  		} else {
   109  			// Escape non-printable characters.
   110  			buf.Write(HexMap[currentByte])
   111  		}
   112  	} else if ln == 2 {
   113  		// For multi-byte runes, print them based on their width.
   114  		fmt.Fprintf(buf, `\u%04X`, currentRune)
   115  	} else {
   116  		fmt.Fprintf(buf, `\U%08X`, currentRune)
   117  	}
   118  }
   119  
   120  func writeHexDigit(buf *bytes.Buffer, v int) {
   121  	if v < 10 {
   122  		buf.WriteByte('0' + byte(v))
   123  	} else {
   124  		buf.WriteByte('a' + byte(v-10))
   125  	}
   126  }
   127  
   128  // NeedEscape returns whether the given byte needs to be escaped.
   129  func NeedEscape(ch byte) bool {
   130  	return EncodeMap[ch] != DontEscape
   131  }