github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/pgwire_encode.go (about) 1 // Copyright 2018 The Cockroach Authors. 2 // 3 // Use of this software is governed by the Business Source License 4 // included in the file licenses/BSL.txt. 5 // 6 // As of the Change Date specified in that file, in accordance with 7 // the Business Source License, use of this software will be governed 8 // by the Apache License, Version 2.0, included in the file 9 // licenses/APL.txt. 10 11 package tree 12 13 import ( 14 "bytes" 15 "fmt" 16 "math" 17 "strconv" 18 "time" 19 "unicode/utf8" 20 21 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/sessiondatapb" 22 "github.com/cockroachdb/cockroachdb-parser/pkg/sql/types" 23 "github.com/cockroachdb/cockroachdb-parser/pkg/util/timeofday" 24 "github.com/cockroachdb/cockroachdb-parser/pkg/util/timetz" 25 "github.com/lib/pq/oid" 26 ) 27 28 // ResolveBlankPaddedChar pads the given string with spaces if blank padding is 29 // required or returns the string unmodified otherwise. 30 func ResolveBlankPaddedChar(s string, t *types.T) string { 31 if t.Oid() == oid.T_bpchar && len(s) < int(t.Width()) { 32 // Pad spaces on the right of the string to make it of length specified 33 // in the type t. 34 return fmt.Sprintf("%-*v", t.Width(), s) 35 } 36 return s 37 } 38 39 func (d *DTuple) pgwireFormat(ctx *FmtCtx) { 40 // When converting a tuple to text in "postgres mode" there is 41 // special behavior: values are printed in "postgres mode" then the 42 // result string itself is rendered in "postgres mode". 43 // Immediate NULL tuple elements are printed as the empty string. 44 // 45 // In this last conversion, for *tuples* the special double quote 46 // and backslash characters are *doubled* (not escaped). Other 47 // special characters from C like \t \n etc are not escaped and 48 // instead printed as-is. Only non-valid characters get escaped to 49 // hex. So we delegate this formatting to a tuple-specific 50 // string printer called pgwireFormatStringInTuple(). 51 ctx.WriteByte('(') 52 comma := "" 53 tc := d.ResolvedType().TupleContents() 54 for i, v := range d.D { 55 ctx.WriteString(comma) 56 var t *types.T 57 if i < len(tc) { 58 t = tc[i] 59 } else { 60 t = v.ResolvedType() 61 } 62 switch dv := UnwrapDOidWrapper(v).(type) { 63 case dNull: 64 case *DString: 65 s := ResolveBlankPaddedChar(string(*dv), t) 66 pgwireFormatStringInTuple(&ctx.Buffer, s) 67 case *DCollatedString: 68 s := ResolveBlankPaddedChar(dv.Contents, t) 69 pgwireFormatStringInTuple(&ctx.Buffer, s) 70 // Bytes cannot use the default case because they will be incorrectly 71 // double escaped. 72 case *DBytes: 73 ctx.WriteString(`"\`) 74 ctx.FormatNode(dv) 75 ctx.WriteString(`"`) 76 case *DJSON: 77 var buf bytes.Buffer 78 dv.JSON.Format(&buf) 79 pgwireFormatStringInTuple(&ctx.Buffer, buf.String()) 80 case *DFloat: 81 fl := float64(*dv) 82 b := PgwireFormatFloat(nil /*buf*/, fl, ctx.dataConversionConfig, t) 83 ctx.WriteString(string(b)) 84 default: 85 s := AsStringWithFlags(v, ctx.flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location)) 86 pgwireFormatStringInTuple(&ctx.Buffer, s) 87 } 88 comma = "," 89 } 90 ctx.WriteByte(')') 91 } 92 93 func pgwireFormatStringInTuple(buf *bytes.Buffer, in string) { 94 quote := pgwireQuoteStringInTuple(in) 95 if quote { 96 buf.WriteByte('"') 97 } 98 // Loop through each unicode code point. 99 for _, r := range in { 100 if r == '"' || r == '\\' { 101 // Strings in tuples double " and \. 102 buf.WriteByte(byte(r)) 103 buf.WriteByte(byte(r)) 104 } else { 105 buf.WriteRune(r) 106 } 107 } 108 if quote { 109 buf.WriteByte('"') 110 } 111 } 112 113 func (d *DArray) pgwireFormat(ctx *FmtCtx) { 114 // When converting an array to text in "postgres mode" there is 115 // special behavior: values are printed in "postgres mode" then the 116 // result string itself is rendered in "postgres mode". 117 // Immediate NULL array elements are printed as "NULL". 118 // 119 // In this last conversion, for *arrays* the special double quote 120 // and backslash characters are *escaped* (not doubled). Other 121 // special characters from C like \t \n etc are not escaped and 122 // instead printed as-is. Only non-valid characters get escaped to 123 // hex. So we delegate this formatting to a tuple-specific 124 // string printer called pgwireFormatStringInArray(). 125 switch d.ResolvedType().Oid() { 126 case oid.T_int2vector, oid.T_oidvector: 127 // vectors are serialized as a string of space-separated values. 128 sep := "" 129 // TODO(justin): add a test for nested arrays when #32552 is 130 // addressed. 131 for _, d := range d.Array { 132 ctx.WriteString(sep) 133 ctx.FormatNode(d) 134 sep = " " 135 } 136 return 137 } 138 139 if ctx.HasFlags(FmtPGCatalog) { 140 ctx.WriteByte('\'') 141 } 142 ctx.WriteByte('{') 143 delimiter := "" 144 for _, v := range d.Array { 145 ctx.WriteString(delimiter) 146 switch dv := UnwrapDOidWrapper(v).(type) { 147 case dNull: 148 ctx.WriteString("NULL") 149 case *DString: 150 pgwireFormatStringInArray(ctx, string(*dv)) 151 case *DCollatedString: 152 pgwireFormatStringInArray(ctx, dv.Contents) 153 // Bytes cannot use the default case because they will be incorrectly 154 // double escaped. 155 case *DBytes: 156 ctx.WriteString(`"\`) 157 ctx.FormatNode(dv) 158 ctx.WriteString(`"`) 159 case *DFloat: 160 fl := float64(*dv) 161 floatTyp := d.ResolvedType().ArrayContents() 162 b := PgwireFormatFloat(nil /*buf*/, fl, ctx.dataConversionConfig, floatTyp) 163 ctx.WriteString(string(b)) 164 case *DJSON: 165 flags := ctx.flags | fmtRawStrings 166 s := AsStringWithFlags(v, flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location)) 167 pgwireFormatStringInArray(ctx, s) 168 default: 169 s := AsStringWithFlags(v, ctx.flags, FmtDataConversionConfig(ctx.dataConversionConfig), FmtLocation(ctx.location)) 170 pgwireFormatStringInArray(ctx, s) 171 } 172 delimiter = d.ParamTyp.Delimiter() 173 } 174 ctx.WriteByte('}') 175 if ctx.HasFlags(FmtPGCatalog) { 176 ctx.WriteByte('\'') 177 } 178 } 179 180 var tupleQuoteSet, arrayQuoteSet asciiSet 181 182 func init() { 183 var ok bool 184 tupleQuoteSet, ok = makeASCIISet(" \t\v\f\r\n(),\"\\") 185 if !ok { 186 panic("tuple asciiset") 187 } 188 arrayQuoteSet, ok = makeASCIISet(" \t\v\f\r\n{},\"\\") 189 if !ok { 190 panic("array asciiset") 191 } 192 } 193 194 // PgwireFormatFloat returns a []byte representing a float according to 195 // pgwire encoding. The result is appended to the given buffer. 196 func PgwireFormatFloat( 197 buf []byte, fl float64, conv sessiondatapb.DataConversionConfig, floatTyp *types.T, 198 ) []byte { 199 // PostgreSQL supports 'Inf' as a valid literal for the floating point 200 // special value Infinity, therefore handling the special cases for them. 201 // (https://github.com/cockroachdb/cockroachdb-parser/issues/62601) 202 if math.IsInf(fl, 1) { 203 return append(buf, []byte("Infinity")...) 204 } else if math.IsInf(fl, -1) { 205 return append(buf, []byte("-Infinity")...) 206 } else { 207 return strconv.AppendFloat( 208 buf, fl, 'g', 209 conv.GetFloatPrec(floatTyp), 210 int(floatTyp.Width()), 211 ) 212 } 213 } 214 215 func pgwireQuoteStringInTuple(in string) bool { 216 return in == "" || tupleQuoteSet.in(in) 217 } 218 219 func pgwireQuoteStringInArray(in string) bool { 220 if in == "" || arrayQuoteSet.in(in) { 221 return true 222 } 223 if len(in) == 4 && 224 (in[0] == 'n' || in[0] == 'N') && 225 (in[1] == 'u' || in[1] == 'U') && 226 (in[2] == 'l' || in[2] == 'L') && 227 (in[3] == 'l' || in[3] == 'L') { 228 return true 229 } 230 return false 231 } 232 233 func pgwireFormatStringInArray(ctx *FmtCtx, in string) { 234 buf := &ctx.Buffer 235 quote := pgwireQuoteStringInArray(in) 236 if quote { 237 buf.WriteByte('"') 238 } 239 // Loop through each unicode code point. 240 for _, r := range in { 241 if r == '"' || r == '\\' { 242 // Strings in arrays escape " and \. 243 buf.WriteByte('\\') 244 buf.WriteByte(byte(r)) 245 } else if ctx.HasFlags(FmtPGCatalog) && r == '\'' { 246 buf.WriteByte('\'') 247 buf.WriteByte('\'') 248 } else { 249 buf.WriteRune(r) 250 } 251 } 252 if quote { 253 buf.WriteByte('"') 254 } 255 } 256 257 // From: https://github.com/golang/go/blob/master/src/strings/strings.go 258 259 // asciiSet is a 32-byte value, where each bit represents the presence of a 260 // given ASCII character in the set. The 128-bits of the lower 16 bytes, 261 // starting with the least-significant bit of the lowest word to the 262 // most-significant bit of the highest word, map to the full range of all 263 // 128 ASCII characters. The 128-bits of the upper 16 bytes will be zeroed, 264 // ensuring that any non-ASCII character will be reported as not in the set. 265 type asciiSet [8]uint32 266 267 // makeASCIISet creates a set of ASCII characters and reports whether all 268 // characters in chars are ASCII. 269 func makeASCIISet(chars string) (as asciiSet, ok bool) { 270 for i := 0; i < len(chars); i++ { 271 c := chars[i] 272 if c >= utf8.RuneSelf { 273 return as, false 274 } 275 as[c>>5] |= 1 << uint(c&31) 276 } 277 return as, true 278 } 279 280 // contains reports whether c is inside the set. 281 func (as *asciiSet) contains(c byte) bool { 282 return (as[c>>5] & (1 << uint(c&31))) != 0 283 } 284 285 // in reports whether any member of the set is in s. 286 func (as *asciiSet) in(s string) bool { 287 for i := 0; i < len(s); i++ { 288 if as.contains(s[i]) { 289 return true 290 } 291 } 292 return false 293 } 294 295 // This block contains all available PG time formats. 296 const ( 297 PGTimeFormat = "15:04:05.999999" 298 PGDateFormat = "2006-01-02" 299 PGTimeStampFormatNoOffset = PGDateFormat + " " + PGTimeFormat 300 PGTimeStampFormat = PGTimeStampFormatNoOffset + "-07" 301 PGTime2400Format = "24:00:00" 302 PGTimeTZFormat = PGTimeFormat + "-07" 303 ) 304 305 // PGWireFormatTime formats t into a format lib/pq understands, appending to the 306 // provided tmp buffer and reallocating if needed. The function will then return 307 // the resulting buffer. 308 func PGWireFormatTime(t timeofday.TimeOfDay, tmp []byte) []byte { 309 return t.AppendFormat(tmp) 310 } 311 312 // PGWireFormatTimeTZ formats t into a format lib/pq understands, appending to the 313 // provided tmp buffer and reallocating if needed. The function will then return 314 // the resulting buffer. 315 func PGWireFormatTimeTZ(t timetz.TimeTZ, tmp []byte) []byte { 316 format := PGTimeTZFormat 317 if t.OffsetSecs%60 != 0 { 318 format += ":00:00" 319 } else if t.OffsetSecs%3600 != 0 { 320 format += ":00" 321 } 322 ret := t.ToTime().AppendFormat(tmp, format) 323 // time.Time's AppendFormat does not recognize 2400, so special case it accordingly. 324 if t.TimeOfDay == timeofday.Time2400 { 325 // It instead reads 00:00:00. Replace that text. 326 var newRet []byte 327 newRet = append(newRet, PGTime2400Format...) 328 newRet = append(newRet, ret[len(PGTime2400Format):]...) 329 ret = newRet 330 } 331 return ret 332 } 333 334 // PGWireFormatTimestamp formats t into a format lib/pq understands. 335 // If offset is not nil, it will not display the timezone offset. 336 func PGWireFormatTimestamp(t time.Time, offset *time.Location, tmp []byte) (b []byte) { 337 format := PGTimeStampFormatNoOffset 338 if offset != nil { 339 format = PGTimeStampFormat 340 if _, offsetSeconds := t.In(offset).Zone(); offsetSeconds%60 != 0 { 341 format += ":00:00" 342 } else if offsetSeconds%3600 != 0 { 343 format += ":00" 344 } 345 } 346 347 // Need to send dates before 0001 A.D. with " BC" suffix, instead of the 348 // minus sign preferred by Go. 349 // Beware, "0000" in ISO is "1 BC", "-0001" is "2 BC" and so on 350 if offset != nil { 351 t = t.In(offset) 352 } 353 354 bc := false 355 if t.Year() <= 0 { 356 // flip year sign, and add 1, e.g: "0" will be "1", and "-10" will be "11" 357 t = t.AddDate((-t.Year())*2+1, 0, 0) 358 bc = true 359 } 360 361 b = t.AppendFormat(tmp, format) 362 if bc { 363 b = append(b, " BC"...) 364 } 365 return b 366 }