github.com/cockroachdb/cockroachdb-parser@v0.23.3-0.20240213214944-911057d40c9a/pkg/sql/sem/tree/parse_string.go (about)

     1  // Copyright 2018 The Cockroach Authors.
     2  //
     3  // Use of this software is governed by the Business Source License
     4  // included in the file licenses/BSL.txt.
     5  //
     6  // As of the Change Date specified in that file, in accordance with
     7  // the Business Source License, use of this software will be governed
     8  // by the Apache License, Version 2.0, included in the file
     9  // licenses/APL.txt.
    10  
    11  package tree
    12  
    13  import (
    14  	"strconv"
    15  	"strings"
    16  	"time"
    17  
    18  	"github.com/cockroachdb/apd/v3"
    19  	"github.com/cockroachdb/cockroachdb-parser/pkg/col/typeconv"
    20  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/lex"
    21  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgcode"
    22  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/pgwire/pgerror"
    23  	"github.com/cockroachdb/cockroachdb-parser/pkg/sql/types"
    24  	"github.com/cockroachdb/cockroachdb-parser/pkg/util"
    25  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/duration"
    26  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/encoding"
    27  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/json"
    28  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/timeutil/pgdate"
    29  	"github.com/cockroachdb/cockroachdb-parser/pkg/util/uuid"
    30  	"github.com/cockroachdb/errors"
    31  	"github.com/lib/pq/oid"
    32  )
    33  
    34  // ParseAndRequireString parses s as type t for simple types.
    35  //
    36  // The dependsOnContext return value indicates if we had to consult the
    37  // ParseContext (either for the time or the local timezone).
    38  func ParseAndRequireString(
    39  	t *types.T, s string, ctx ParseContext,
    40  ) (d Datum, dependsOnContext bool, err error) {
    41  	switch t.Family() {
    42  	case types.ArrayFamily:
    43  		d, dependsOnContext, err = ParseDArrayFromString(ctx, s, t.ArrayContents())
    44  	case types.BitFamily:
    45  		r, err := ParseDBitArray(s)
    46  		if err != nil {
    47  			return nil, false, err
    48  		}
    49  		d = FormatBitArrayToType(r, t)
    50  	case types.BoolFamily:
    51  		d, err = ParseDBool(strings.TrimSpace(s))
    52  	case types.BytesFamily:
    53  		d, err = ParseDByte(s)
    54  	case types.DateFamily:
    55  		d, dependsOnContext, err = ParseDDate(ctx, s)
    56  	case types.DecimalFamily:
    57  		d, err = ParseDDecimal(strings.TrimSpace(s))
    58  	case types.FloatFamily:
    59  		d, err = ParseDFloat(strings.TrimSpace(s))
    60  	case types.INetFamily:
    61  		d, err = ParseDIPAddrFromINetString(s)
    62  	case types.IntFamily:
    63  		d, err = ParseDInt(strings.TrimSpace(s))
    64  	case types.IntervalFamily:
    65  		itm, typErr := t.IntervalTypeMetadata()
    66  		if typErr != nil {
    67  			return nil, false, typErr
    68  		}
    69  		d, err = ParseDIntervalWithTypeMetadata(intervalStyle(ctx), s, itm)
    70  	case types.PGLSNFamily:
    71  		d, err = ParseDPGLSN(s)
    72  	case types.RefCursorFamily:
    73  		d = NewDRefCursor(s)
    74  	case types.Box2DFamily:
    75  		d, err = ParseDBox2D(s)
    76  	case types.GeographyFamily:
    77  		d, err = ParseDGeography(s)
    78  	case types.GeometryFamily:
    79  		d, err = ParseDGeometry(s)
    80  	case types.JsonFamily:
    81  		d, err = ParseDJSON(s)
    82  	case types.OidFamily:
    83  		if t.Oid() != oid.T_oid && s == ZeroOidValue {
    84  			d = WrapAsZeroOid(t)
    85  		} else {
    86  			d, err = ParseDOidAsInt(s)
    87  		}
    88  	case types.CollatedStringFamily:
    89  		d, err = NewDCollatedString(s, t.Locale(), ctx.GetCollationEnv())
    90  	case types.StringFamily:
    91  		s = truncateString(s, t)
    92  		return NewDString(s), false, nil
    93  	case types.TimeFamily:
    94  		d, dependsOnContext, err = ParseDTime(ctx, s, TimeFamilyPrecisionToRoundDuration(t.Precision()))
    95  	case types.TimeTZFamily:
    96  		d, dependsOnContext, err = ParseDTimeTZ(ctx, s, TimeFamilyPrecisionToRoundDuration(t.Precision()))
    97  	case types.TimestampFamily:
    98  		d, dependsOnContext, err = ParseDTimestamp(ctx, s, TimeFamilyPrecisionToRoundDuration(t.Precision()))
    99  	case types.TimestampTZFamily:
   100  		d, dependsOnContext, err = ParseDTimestampTZ(ctx, s, TimeFamilyPrecisionToRoundDuration(t.Precision()))
   101  	case types.UuidFamily:
   102  		d, err = ParseDUuidFromString(s)
   103  	case types.EnumFamily:
   104  		var e DEnum
   105  		e, err = MakeDEnumFromLogicalRepresentation(t, s)
   106  		if err == nil {
   107  			d = NewDEnum(e)
   108  		}
   109  	case types.TSQueryFamily:
   110  		d, err = ParseDTSQuery(s)
   111  	case types.TSVectorFamily:
   112  		d, err = ParseDTSVector(s)
   113  	case types.TupleFamily:
   114  		d, dependsOnContext, err = ParseDTupleFromString(ctx, s, t)
   115  	case types.VoidFamily:
   116  		d = DVoidDatum
   117  	default:
   118  		return nil, false, errors.AssertionFailedf("unknown type %s", t.SQLStringForError())
   119  	}
   120  	if err != nil {
   121  		return d, dependsOnContext, err
   122  	}
   123  	d, err = AdjustValueToType(t, d)
   124  	return d, dependsOnContext, err
   125  }
   126  
   127  func truncateString(s string, t *types.T) string {
   128  	// If the string type specifies a limit we truncate to that limit:
   129  	//   'hello'::CHAR(2) -> 'he'
   130  	// This is true of all the string type variants.
   131  	if t.Width() > 0 {
   132  		s = util.TruncateString(s, int(t.Width()))
   133  	}
   134  	return s
   135  }
   136  
   137  // ParseDOidAsInt parses the input and returns it as an OID. If the input
   138  // is not formatted as an int, an error is returned.
   139  func ParseDOidAsInt(s string) (*DOid, error) {
   140  	i, err := strconv.ParseInt(strings.TrimSpace(s), 0, 64)
   141  	if err != nil {
   142  		return nil, MakeParseError(s, types.Oid, err)
   143  	}
   144  	return IntToOid(DInt(i))
   145  }
   146  
   147  // FormatBitArrayToType formats bit arrays such that they fill the total width
   148  // if too short, or truncate if too long.
   149  func FormatBitArrayToType(d *DBitArray, t *types.T) *DBitArray {
   150  	if t.Width() == 0 || d.BitLen() == uint(t.Width()) {
   151  		return d
   152  	}
   153  	a := d.BitArray.Clone()
   154  	switch t.Oid() {
   155  	case oid.T_varbit:
   156  		// VARBITs do not have padding attached, so only truncate.
   157  		if uint(t.Width()) < a.BitLen() {
   158  			a = a.ToWidth(uint(t.Width()))
   159  		}
   160  	default:
   161  		a = a.ToWidth(uint(t.Width()))
   162  	}
   163  	return &DBitArray{a}
   164  }
   165  
   166  // ValueHandler is an interface to allow raw types to be extracted from strings.
   167  type ValueHandler interface {
   168  	Len() int
   169  	Null()
   170  	Date(d pgdate.Date)
   171  	Datum(d Datum)
   172  	Bool(b bool)
   173  	Bytes(b []byte)
   174  	// Decimal returns a pointer into the vec for in place construction.
   175  	Decimal() *apd.Decimal
   176  	Float(f float64)
   177  	Int16(i int16)
   178  	Int32(i int32)
   179  	Int(i int64)
   180  	Duration(d duration.Duration)
   181  	JSON(j json.JSON)
   182  	String(s string)
   183  	TimestampTZ(t time.Time)
   184  	Reset()
   185  }
   186  
   187  // ParseAndRequireStringHandler parses a string and passes values
   188  // supported by the vector engine directly to a ValueHandler. Other types are
   189  // handled by ParseAndRequireString.
   190  func ParseAndRequireStringHandler(
   191  	t *types.T, s string, ctx ParseContext, vh ValueHandler, ph *pgdate.ParseHelper,
   192  ) (err error) {
   193  	switch t.Family() {
   194  	case types.BoolFamily:
   195  		var b bool
   196  		if b, err = ParseBool(strings.TrimSpace(s)); err == nil {
   197  			vh.Bool(b)
   198  		}
   199  	case types.BytesFamily:
   200  		var res []byte
   201  		if res, err = lex.DecodeRawBytesToByteArrayAuto(encoding.UnsafeConvertStringToBytes(s)); err == nil {
   202  			vh.Bytes(res)
   203  		} else {
   204  			err = MakeParseError(s, types.Bytes, err)
   205  		}
   206  	case types.DateFamily:
   207  		now := relativeParseTime(ctx)
   208  		var t pgdate.Date
   209  		if t, _, err = pgdate.ParseDate(now, dateStyle(ctx), s, ph); err == nil {
   210  			vh.Date(t)
   211  		}
   212  	case types.DecimalFamily:
   213  		// Decimal is a little different to allow in place construction.
   214  		dec := vh.Decimal()
   215  		if err = setDecimalString(s, dec); err != nil {
   216  			// Erase any invalid results.
   217  			*dec = apd.Decimal{}
   218  		}
   219  	case types.FloatFamily:
   220  		var f float64
   221  		if f, err = strconv.ParseFloat(s, 64); err == nil {
   222  			vh.Float(f)
   223  		} else {
   224  			err = MakeParseError(s, types.Float, err)
   225  		}
   226  	case types.IntFamily:
   227  		var i int64
   228  		switch t.Width() {
   229  		case 16:
   230  			if i, err = strconv.ParseInt(s, 0, 16); err == nil {
   231  				vh.Int16(int16(i))
   232  			} else {
   233  				err = MakeParseError(s, t, err)
   234  			}
   235  		case 32:
   236  			if i, err = strconv.ParseInt(s, 0, 32); err == nil {
   237  				vh.Int32(int32(i))
   238  			} else {
   239  				err = MakeParseError(s, t, err)
   240  			}
   241  		default:
   242  			if i, err = strconv.ParseInt(s, 0, 64); err == nil {
   243  				vh.Int(i)
   244  			} else {
   245  				err = MakeParseError(s, t, err)
   246  			}
   247  		}
   248  	case types.JsonFamily:
   249  		var j json.JSON
   250  		if j, err = json.ParseJSON(s); err == nil {
   251  			vh.JSON(j)
   252  		} else {
   253  			err = pgerror.Wrapf(err, pgcode.Syntax, "could not parse JSON")
   254  		}
   255  	case types.StringFamily:
   256  		s = truncateString(s, t)
   257  		vh.String(s)
   258  	case types.TimestampTZFamily:
   259  		// TODO(cucaroach): can we refactor the next 3 case arms to be simpler
   260  		// and avoid code duplication?
   261  		now := relativeParseTime(ctx)
   262  		var ts time.Time
   263  		if ts, _, err = pgdate.ParseTimestamp(now, dateStyle(ctx), s); err == nil {
   264  			// Always normalize time to the current location.
   265  			if ts, err = checkTimeBounds(ts, TimeFamilyPrecisionToRoundDuration(t.Precision())); err == nil {
   266  				vh.TimestampTZ(ts)
   267  			}
   268  		}
   269  	case types.TimestampFamily:
   270  		now := relativeParseTime(ctx)
   271  		var ts time.Time
   272  		if ts, _, err = pgdate.ParseTimestampWithoutTimezone(now, dateStyle(ctx), s); err == nil {
   273  			// Always normalize time to the current location.
   274  			if ts, err = checkTimeBounds(ts, TimeFamilyPrecisionToRoundDuration(t.Precision())); err == nil {
   275  				vh.TimestampTZ(ts)
   276  			}
   277  		}
   278  	case types.IntervalFamily:
   279  		var itm types.IntervalTypeMetadata
   280  		itm, err = t.IntervalTypeMetadata()
   281  		if err == nil {
   282  			var d duration.Duration
   283  			d, err = ParseIntervalWithTypeMetadata(intervalStyle(ctx), s, itm)
   284  			if err == nil {
   285  				vh.Duration(d)
   286  			}
   287  		}
   288  	case types.UuidFamily:
   289  		var uv uuid.UUID
   290  		uv, err = uuid.FromString(s)
   291  		if err == nil {
   292  			vh.Bytes(uv.GetBytes())
   293  		} else {
   294  			err = MakeParseError(s, types.Uuid, err)
   295  		}
   296  	case types.EnumFamily:
   297  		var d DEnum
   298  		d, err = MakeDEnumFromLogicalRepresentation(t, s)
   299  		if err == nil {
   300  			vh.Bytes(d.PhysicalRep)
   301  		}
   302  	default:
   303  		if typeconv.TypeFamilyToCanonicalTypeFamily(t.Family()) != typeconv.DatumVecCanonicalTypeFamily {
   304  			return errors.AssertionFailedf("unexpected type %v in datum case arm, does a new type need to be handled?", t)
   305  		}
   306  		var d Datum
   307  		if d, _, err = ParseAndRequireString(t, s, ctx); err == nil {
   308  			vh.Datum(d)
   309  		}
   310  	}
   311  	return err
   312  }