github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/sem/tree/pgwire_encode.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"bytes"
    15  	"unicode/utf8"
    16  
    17  	"github.com/lib/pq/oid"
    18  )
    19  
    20  func (d *DTuple) pgwireFormat(ctx *FmtCtx) {
    21  	// When converting a tuple to text in "postgres mode" there is
    22  	// special behavior: values are printed in "postgres mode" then the
    23  	// result string itself is rendered in "postgres mode".
    24  	// Immediate NULL tuple elements are printed as the empty string.
    25  	//
    26  	// In this last conversion, for *tuples* the special double quote
    27  	// and backslash characters are *doubled* (not escaped).  Other
    28  	// special characters from C like \t \n etc are not escaped and
    29  	// instead printed as-is. Only non-valid characters get escaped to
    30  	// hex. So we delegate this formatting to a tuple-specific
    31  	// string printer called pgwireFormatStringInTuple().
    32  	ctx.WriteByte('(')
    33  	comma := ""
    34  	for _, v := range d.D {
    35  		ctx.WriteString(comma)
    36  		switch dv := UnwrapDatum(nil, v).(type) {
    37  		case dNull:
    38  		case *DString:
    39  			pgwireFormatStringInTuple(&ctx.Buffer, string(*dv))
    40  		case *DCollatedString:
    41  			pgwireFormatStringInTuple(&ctx.Buffer, dv.Contents)
    42  			// Bytes cannot use the default case because they will be incorrectly
    43  			// double escaped.
    44  		case *DBytes:
    45  			ctx.FormatNode(dv)
    46  		case *DJSON:
    47  			var buf bytes.Buffer
    48  			dv.JSON.Format(&buf)
    49  			pgwireFormatStringInTuple(&ctx.Buffer, buf.String())
    50  		default:
    51  			s := AsStringWithFlags(v, ctx.flags)
    52  			pgwireFormatStringInTuple(&ctx.Buffer, s)
    53  		}
    54  		comma = ","
    55  	}
    56  	ctx.WriteByte(')')
    57  }
    58  
    59  func pgwireFormatStringInTuple(buf *bytes.Buffer, in string) {
    60  	quote := pgwireQuoteStringInTuple(in)
    61  	if quote {
    62  		buf.WriteByte('"')
    63  	}
    64  	// Loop through each unicode code point.
    65  	for _, r := range in {
    66  		if r == '"' || r == '\\' {
    67  			// Strings in tuples double " and \.
    68  			buf.WriteByte(byte(r))
    69  			buf.WriteByte(byte(r))
    70  		} else {
    71  			buf.WriteRune(r)
    72  		}
    73  	}
    74  	if quote {
    75  		buf.WriteByte('"')
    76  	}
    77  }
    78  
    79  func (d *DArray) pgwireFormat(ctx *FmtCtx) {
    80  	// When converting an array to text in "postgres mode" there is
    81  	// special behavior: values are printed in "postgres mode" then the
    82  	// result string itself is rendered in "postgres mode".
    83  	// Immediate NULL array elements are printed as "NULL".
    84  	//
    85  	// In this last conversion, for *arrays* the special double quote
    86  	// and backslash characters are *escaped* (not doubled).  Other
    87  	// special characters from C like \t \n etc are not escaped and
    88  	// instead printed as-is. Only non-valid characters get escaped to
    89  	// hex. So we delegate this formatting to a tuple-specific
    90  	// string printer called pgwireFormatStringInArray().
    91  	switch d.ResolvedType().Oid() {
    92  	case oid.T_int2vector, oid.T_oidvector:
    93  		// vectors are serialized as a string of space-separated values.
    94  		sep := ""
    95  		// TODO(justin): add a test for nested arrays when #32552 is
    96  		// addressed.
    97  		for _, d := range d.Array {
    98  			ctx.WriteString(sep)
    99  			ctx.FormatNode(d)
   100  			sep = " "
   101  		}
   102  		return
   103  	}
   104  
   105  	ctx.WriteByte('{')
   106  	comma := ""
   107  	for _, v := range d.Array {
   108  		ctx.WriteString(comma)
   109  		switch dv := UnwrapDatum(nil, v).(type) {
   110  		case dNull:
   111  			ctx.WriteString("NULL")
   112  		case *DString:
   113  			pgwireFormatStringInArray(&ctx.Buffer, string(*dv))
   114  		case *DCollatedString:
   115  			pgwireFormatStringInArray(&ctx.Buffer, dv.Contents)
   116  			// Bytes cannot use the default case because they will be incorrectly
   117  			// double escaped.
   118  		case *DBytes:
   119  			ctx.FormatNode(dv)
   120  		default:
   121  			s := AsStringWithFlags(v, ctx.flags)
   122  			pgwireFormatStringInArray(&ctx.Buffer, s)
   123  		}
   124  		comma = ","
   125  	}
   126  	ctx.WriteByte('}')
   127  }
   128  
   129  var tupleQuoteSet, arrayQuoteSet asciiSet
   130  
   131  func init() {
   132  	var ok bool
   133  	tupleQuoteSet, ok = makeASCIISet(" \t\v\f\r\n(),\"\\")
   134  	if !ok {
   135  		panic("tuple asciiset")
   136  	}
   137  	arrayQuoteSet, ok = makeASCIISet(" \t\v\f\r\n{},\"\\")
   138  	if !ok {
   139  		panic("array asciiset")
   140  	}
   141  }
   142  
   143  func pgwireQuoteStringInTuple(in string) bool {
   144  	return in == "" || tupleQuoteSet.in(in)
   145  }
   146  
   147  func pgwireQuoteStringInArray(in string) bool {
   148  	if in == "" || arrayQuoteSet.in(in) {
   149  		return true
   150  	}
   151  	if len(in) == 4 &&
   152  		(in[0] == 'n' || in[0] == 'N') &&
   153  		(in[1] == 'u' || in[1] == 'U') &&
   154  		(in[2] == 'l' || in[2] == 'L') &&
   155  		(in[3] == 'l' || in[3] == 'L') {
   156  		return true
   157  	}
   158  	return false
   159  }
   160  
   161  func pgwireFormatStringInArray(buf *bytes.Buffer, in string) {
   162  	quote := pgwireQuoteStringInArray(in)
   163  	if quote {
   164  		buf.WriteByte('"')
   165  	}
   166  	// Loop through each unicode code point.
   167  	for _, r := range in {
   168  		if r == '"' || r == '\\' {
   169  			// Strings in arrays escape " and \.
   170  			buf.WriteByte('\\')
   171  			buf.WriteByte(byte(r))
   172  		} else {
   173  			buf.WriteRune(r)
   174  		}
   175  	}
   176  	if quote {
   177  		buf.WriteByte('"')
   178  	}
   179  }
   180  
   181  // From: https://github.com/golang/go/blob/master/src/strings/strings.go
   182  
   183  // asciiSet is a 32-byte value, where each bit represents the presence of a
   184  // given ASCII character in the set. The 128-bits of the lower 16 bytes,
   185  // starting with the least-significant bit of the lowest word to the
   186  // most-significant bit of the highest word, map to the full range of all
   187  // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
   188  // ensuring that any non-ASCII character will be reported as not in the set.
   189  type asciiSet [8]uint32
   190  
   191  // makeASCIISet creates a set of ASCII characters and reports whether all
   192  // characters in chars are ASCII.
   193  func makeASCIISet(chars string) (as asciiSet, ok bool) {
   194  	for i := 0; i < len(chars); i++ {
   195  		c := chars[i]
   196  		if c >= utf8.RuneSelf {
   197  			return as, false
   198  		}
   199  		as[c>>5] |= 1 << uint(c&31)
   200  	}
   201  	return as, true
   202  }
   203  
   204  // contains reports whether c is inside the set.
   205  func (as *asciiSet) contains(c byte) bool {
   206  	return (as[c>>5] & (1 << uint(c&31))) != 0
   207  }
   208  
   209  // in reports whether any member of the set is in s.
   210  func (as *asciiSet) in(s string) bool {
   211  	for i := 0; i < len(s); i++ {
   212  		if as.contains(s[i]) {
   213  			return true
   214  		}
   215  	}
   216  	return false
   217  }