github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/pgwire_encode.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"bytes"
    15  	"fmt"
    16  	"math"
    17  	"strconv"
    18  	"time"
    19  	"unicode/utf8"
    20  
    21  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/sessiondatapb"
    22  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/types"
    23  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/timeofday"
    24  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/timetz"
    25  	"github.com/lib/pq/oid"
    26  )
    27  
    28  // ResolveBlankPaddedChar pads the given string with spaces if blank padding is
    29  // required or returns the string unmodified otherwise.
    30  func ResolveBlankPaddedChar(s string, t *types.T) string {
    31  	if t.Oid() == oid.T_bpchar && len(s) < int(t.Width()) {
    32  		// Pad spaces on the right of the string to make it of length specified
    33  		// in the type t.
    34  		return fmt.Sprintf("%-*v", t.Width(), s)
    35  	}
    36  	return s
    37  }
    38  
    39  func (d *DTuple) pgwireFormat(ctx *FmtCtx) {
    40  	// When converting a tuple to text in "postgres mode" there is
    41  	// special behavior: values are printed in "postgres mode" then the
    42  	// result string itself is rendered in "postgres mode".
    43  	// Immediate NULL tuple elements are printed as the empty string.
    44  	//
    45  	// In this last conversion, for *tuples* the special double quote
    46  	// and backslash characters are *doubled* (not escaped).  Other
    47  	// special characters from C like \t \n etc are not escaped and
    48  	// instead printed as-is. Only non-valid characters get escaped to
    49  	// hex. So we delegate this formatting to a tuple-specific
    50  	// string printer called pgwireFormatStringInTuple().
    51  	ctx.WriteByte('(')
    52  	comma := ""
    53  	tc := d.ResolvedType().TupleContents()
    54  	for i, v := range d.D {
    55  		ctx.WriteString(comma)
    56  		var t *types.T
    57  		if i < len(tc) {
    58  			t = tc[i]
    59  		} else {
    60  			t = v.ResolvedType()
    61  		}
    62  		switch dv := UnwrapDOidWrapper(v).(type) {
    63  		case dNull:
    64  		case *DString:
    65  			s := ResolveBlankPaddedChar(string(*dv), t)
    66  			pgwireFormatStringInTuple(&ctx.Buffer, s)
    67  		case *DCollatedString:
    68  			s := ResolveBlankPaddedChar(dv.Contents, t)
    69  			pgwireFormatStringInTuple(&ctx.Buffer, s)
    70  			// Bytes cannot use the default case because they will be incorrectly
    71  			// double escaped.
    72  		case *DBytes:
    73  			ctx.WriteString(`"\`)
    74  			ctx.FormatNode(dv)
    75  			ctx.WriteString(`"`)
    76  		case *DJSON:
    77  			var buf bytes.Buffer
    78  			dv.JSON.Format(&buf)
    79  			pgwireFormatStringInTuple(&ctx.Buffer, buf.String())
    80  		case *DFloat:
    81  			fl := float64(*dv)
    82  			b := PgwireFormatFloat(nil /*buf*/, fl, ctx.dataConversionConfig, t)
    83  			ctx.WriteString(string(b))
    84  		default:
    85  			s := AsStringWithFlags(v, ctx.flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location))
    86  			pgwireFormatStringInTuple(&ctx.Buffer, s)
    87  		}
    88  		comma = ","
    89  	}
    90  	ctx.WriteByte(')')
    91  }
    92  
    93  func pgwireFormatStringInTuple(buf *bytes.Buffer, in string) {
    94  	quote := pgwireQuoteStringInTuple(in)
    95  	if quote {
    96  		buf.WriteByte('"')
    97  	}
    98  	// Loop through each unicode code point.
    99  	for _, r := range in {
   100  		if r == '"' || r == '\\' {
   101  			// Strings in tuples double " and \.
   102  			buf.WriteByte(byte(r))
   103  			buf.WriteByte(byte(r))
   104  		} else {
   105  			buf.WriteRune(r)
   106  		}
   107  	}
   108  	if quote {
   109  		buf.WriteByte('"')
   110  	}
   111  }
   112  
   113  func (d *DArray) pgwireFormat(ctx *FmtCtx) {
   114  	// When converting an array to text in "postgres mode" there is
   115  	// special behavior: values are printed in "postgres mode" then the
   116  	// result string itself is rendered in "postgres mode".
   117  	// Immediate NULL array elements are printed as "NULL".
   118  	//
   119  	// In this last conversion, for *arrays* the special double quote
   120  	// and backslash characters are *escaped* (not doubled).  Other
   121  	// special characters from C like \t \n etc are not escaped and
   122  	// instead printed as-is. Only non-valid characters get escaped to
   123  	// hex. So we delegate this formatting to a tuple-specific
   124  	// string printer called pgwireFormatStringInArray().
   125  	switch d.ResolvedType().Oid() {
   126  	case oid.T_int2vector, oid.T_oidvector:
   127  		// vectors are serialized as a string of space-separated values.
   128  		sep := ""
   129  		// TODO(justin): add a test for nested arrays when #32552 is
   130  		// addressed.
   131  		for _, d := range d.Array {
   132  			ctx.WriteString(sep)
   133  			ctx.FormatNode(d)
   134  			sep = " "
   135  		}
   136  		return
   137  	}
   138  
   139  	if ctx.HasFlags(FmtPGCatalog) {
   140  		ctx.WriteByte('\'')
   141  	}
   142  	ctx.WriteByte('{')
   143  	delimiter := ""
   144  	for _, v := range d.Array {
   145  		ctx.WriteString(delimiter)
   146  		switch dv := UnwrapDOidWrapper(v).(type) {
   147  		case dNull:
   148  			ctx.WriteString("NULL")
   149  		case *DString:
   150  			pgwireFormatStringInArray(ctx, string(*dv))
   151  		case *DCollatedString:
   152  			pgwireFormatStringInArray(ctx, dv.Contents)
   153  			// Bytes cannot use the default case because they will be incorrectly
   154  			// double escaped.
   155  		case *DBytes:
   156  			ctx.WriteString(`"\`)
   157  			ctx.FormatNode(dv)
   158  			ctx.WriteString(`"`)
   159  		case *DFloat:
   160  			fl := float64(*dv)
   161  			floatTyp := d.ResolvedType().ArrayContents()
   162  			b := PgwireFormatFloat(nil /*buf*/, fl, ctx.dataConversionConfig, floatTyp)
   163  			ctx.WriteString(string(b))
   164  		case *DJSON:
   165  			flags := ctx.flags | fmtRawStrings
   166  			s := AsStringWithFlags(v, flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location))
   167  			pgwireFormatStringInArray(ctx, s)
   168  		default:
   169  			s := AsStringWithFlags(v, ctx.flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location))
   170  			pgwireFormatStringInArray(ctx, s)
   171  		}
   172  		delimiter = d.ParamTyp.Delimiter()
   173  	}
   174  	ctx.WriteByte('}')
   175  	if ctx.HasFlags(FmtPGCatalog) {
   176  		ctx.WriteByte('\'')
   177  	}
   178  }
   179  
   180  var tupleQuoteSet, arrayQuoteSet asciiSet
   181  
   182  func init() {
   183  	var ok bool
   184  	tupleQuoteSet, ok = makeASCIISet(" \t\v\f\r\n(),\"\\")
   185  	if !ok {
   186  		panic("tuple asciiset")
   187  	}
   188  	arrayQuoteSet, ok = makeASCIISet(" \t\v\f\r\n{},\"\\")
   189  	if !ok {
   190  		panic("array asciiset")
   191  	}
   192  }
   193  
   194  // PgwireFormatFloat returns a []byte representing a float according to
   195  // pgwire encoding. The result is appended to the given buffer.
   196  func PgwireFormatFloat(
   197  	buf []byte, fl float64, conv sessiondatapb.DataConversionConfig, floatTyp *types.T,
   198  ) []byte {
   199  	// PostgreSQL supports 'Inf' as a valid literal for the floating point
   200  	// special value Infinity, therefore handling the special cases for them.
   201  	// (https://github.com/cockroachdb/cockroachdb-parser/issues/62601)
   202  	if math.IsInf(fl, 1) {
   203  		return append(buf, []byte("Infinity")...)
   204  	} else if math.IsInf(fl, -1) {
   205  		return append(buf, []byte("-Infinity")...)
   206  	} else {
   207  		return strconv.AppendFloat(
   208  			buf, fl, 'g',
   209  			conv.GetFloatPrec(floatTyp),
   210  			int(floatTyp.Width()),
   211  		)
   212  	}
   213  }
   214  
   215  func pgwireQuoteStringInTuple(in string) bool {
   216  	return in == "" || tupleQuoteSet.in(in)
   217  }
   218  
   219  func pgwireQuoteStringInArray(in string) bool {
   220  	if in == "" || arrayQuoteSet.in(in) {
   221  		return true
   222  	}
   223  	if len(in) == 4 &&
   224  		(in[0] == 'n' || in[0] == 'N') &&
   225  		(in[1] == 'u' || in[1] == 'U') &&
   226  		(in[2] == 'l' || in[2] == 'L') &&
   227  		(in[3] == 'l' || in[3] == 'L') {
   228  		return true
   229  	}
   230  	return false
   231  }
   232  
   233  func pgwireFormatStringInArray(ctx *FmtCtx, in string) {
   234  	buf := &ctx.Buffer
   235  	quote := pgwireQuoteStringInArray(in)
   236  	if quote {
   237  		buf.WriteByte('"')
   238  	}
   239  	// Loop through each unicode code point.
   240  	for _, r := range in {
   241  		if r == '"' || r == '\\' {
   242  			// Strings in arrays escape " and \.
   243  			buf.WriteByte('\\')
   244  			buf.WriteByte(byte(r))
   245  		} else if ctx.HasFlags(FmtPGCatalog) && r == '\'' {
   246  			buf.WriteByte('\'')
   247  			buf.WriteByte('\'')
   248  		} else {
   249  			buf.WriteRune(r)
   250  		}
   251  	}
   252  	if quote {
   253  		buf.WriteByte('"')
   254  	}
   255  }
   256  
   257  // From: https://github.com/golang/go/blob/master/src/strings/strings.go
   258  
   259  // asciiSet is a 32-byte value, where each bit represents the presence of a
   260  // given ASCII character in the set. The 128-bits of the lower 16 bytes,
   261  // starting with the least-significant bit of the lowest word to the
   262  // most-significant bit of the highest word, map to the full range of all
   263  // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed,
   264  // ensuring that any non-ASCII character will be reported as not in the set.
   265  type asciiSet [8]uint32
   266  
   267  // makeASCIISet creates a set of ASCII characters and reports whether all
   268  // characters in chars are ASCII.
   269  func makeASCIISet(chars string) (as asciiSet, ok bool) {
   270  	for i := 0; i < len(chars); i++ {
   271  		c := chars[i]
   272  		if c >= utf8.RuneSelf {
   273  			return as, false
   274  		}
   275  		as[c>>5] |= 1 << uint(c&31)
   276  	}
   277  	return as, true
   278  }
   279  
   280  // contains reports whether c is inside the set.
   281  func (as *asciiSet) contains(c byte) bool {
   282  	return (as[c>>5] & (1 << uint(c&31))) != 0
   283  }
   284  
   285  // in reports whether any member of the set is in s.
   286  func (as *asciiSet) in(s string) bool {
   287  	for i := 0; i < len(s); i++ {
   288  		if as.contains(s[i]) {
   289  			return true
   290  		}
   291  	}
   292  	return false
   293  }
   294  
   295  // This block contains all available PG time formats.
   296  const (
   297  	PGTimeFormat              = "15:04:05.999999"
   298  	PGDateFormat              = "2006-01-02"
   299  	PGTimeStampFormatNoOffset = PGDateFormat + " " + PGTimeFormat
   300  	PGTimeStampFormat         = PGTimeStampFormatNoOffset + "-07"
   301  	PGTime2400Format          = "24:00:00"
   302  	PGTimeTZFormat            = PGTimeFormat + "-07"
   303  )
   304  
   305  // PGWireFormatTime formats t into a format lib/pq understands, appending to the
   306  // provided tmp buffer and reallocating if needed. The function will then return
   307  // the resulting buffer.
   308  func PGWireFormatTime(t timeofday.TimeOfDay, tmp []byte) []byte {
   309  	return t.AppendFormat(tmp)
   310  }
   311  
   312  // PGWireFormatTimeTZ formats t into a format lib/pq understands, appending to the
   313  // provided tmp buffer and reallocating if needed. The function will then return
   314  // the resulting buffer.
   315  func PGWireFormatTimeTZ(t timetz.TimeTZ, tmp []byte) []byte {
   316  	format := PGTimeTZFormat
   317  	if t.OffsetSecs%60 != 0 {
   318  		format += ":00:00"
   319  	} else if t.OffsetSecs%3600 != 0 {
   320  		format += ":00"
   321  	}
   322  	ret := t.ToTime().AppendFormat(tmp, format)
   323  	// time.Time's AppendFormat does not recognize 2400, so special case it accordingly.
   324  	if t.TimeOfDay == timeofday.Time2400 {
   325  		// It instead reads 00:00:00. Replace that text.
   326  		var newRet []byte
   327  		newRet = append(newRet, PGTime2400Format...)
   328  		newRet = append(newRet, ret[len(PGTime2400Format):]...)
   329  		ret = newRet
   330  	}
   331  	return ret
   332  }
   333  
   334  // PGWireFormatTimestamp formats t into a format lib/pq understands.
   335  // If offset is not nil, it will not display the timezone offset.
   336  func PGWireFormatTimestamp(t time.Time, offset *time.Location, tmp []byte) (b []byte) {
   337  	format := PGTimeStampFormatNoOffset
   338  	if offset != nil {
   339  		format = PGTimeStampFormat
   340  		if _, offsetSeconds := t.In(offset).Zone(); offsetSeconds%60 != 0 {
   341  			format += ":00:00"
   342  		} else if offsetSeconds%3600 != 0 {
   343  			format += ":00"
   344  		}
   345  	}
   346  
   347  	// Need to send dates before 0001 A.D. with " BC" suffix, instead of the
   348  	// minus sign preferred by Go.
   349  	// Beware, "0000" in ISO is "1 BC", "-0001" is "2 BC" and so on
   350  	if offset != nil {
   351  		t = t.In(offset)
   352  	}
   353  
   354  	bc := false
   355  	if t.Year() <= 0 {
   356  		// flip year sign, and add 1, e.g: "0" will be "1", and "-10" will be "11"
   357  		t = t.AddDate((-t.Year())*2+1, 0, 0)
   358  		bc = true
   359  	}
   360  
   361  	b = t.AppendFormat(tmp, format)
   362  	if bc {
   363  		b = append(b, " BC"...)
   364  	}
   365  	return b
   366  }