github.com/cockroachdb/cockroach@v20.2.0-alpha.1+incompatible/pkg/sql/opt/ops/relational.opt (about)

     1  # relational.opt contains Optgen language definitions for all of Cockroach's
     2  # physical and logical operators that return a table-valued result having rows
     3  # and columns (i.e. relational). Many of them correspond to operators in the
     4  # relational algebra, but there are also variants that are useful for concisely
     5  # and incrementally expressing transformations.
     6  #
     7  # Tags
     8  #
     9  # Relational - All operators in this file are marked with the Relational tag,
    10  #              so they can be easily distinguished from Scalar and Enforcer
    11  #              operators.
    12  #
    13  # Join - All join operators (inner, left, right, full, semi, anti), as well as
    14  #        their JoinApply variants, are marked with the Join tag, which allows
    15  #        any of them to fulfill a Join pattern match.
    16  #
    17  # JoinApply - All join apply operators are marked with the JoinApply tag.
    18  #             Unlike standard Join operators, JoinApply operators allow the
    19  #             right input to refer to columns projected by the left input.
    20  #             Allowing this is useful as an intermediate (or sometimes final)
    21  #             step in some important transformations (like eliminating
    22  #             subqueries).
    23  
    24  # Scan returns a result set containing every row in a table by scanning one of
    25  # the table's indexes according to its ordering. The ScanPrivate field
    26  # identifies the table and index to scan, as well as the subset of columns to
    27  # project from it.
    28  #
    29  # The scan can be constrained and/or have an internal row limit. A scan can be
    30  # executed either as a forward or as a reverse scan (except when it has a limit,
    31  # in which case the direction is fixed).
    32  [Relational]
    33  define Scan {
    34      _ ScanPrivate
    35  }
    36  
    37  [Private]
    38  define ScanPrivate {
    39      # Table identifies the table to scan. It is an id that can be passed to
    40      # the Metadata.Table method in order to fetch cat.Table metadata.
    41      Table TableID
    42  
    43      # Index identifies the index to scan (whether primary or secondary). It
    44      # can be passed to the cat.Table.Index() method in order to fetch the
    45      # cat.Index metadata.
    46      Index IndexOrdinal
    47  
    48      # Cols specifies the set of columns that the scan operator projects. This
    49      # may be a subset of the columns that the table/index contains.
    50      Cols ColSet
    51  
    52      # If set, the scan is a constrained scan; the constraint contains the spans
    53      # that need to be scanned.
    54      Constraint Constraint
    55  
    56      # HardLimit specifies the maximum number of rows that the scan can return
    57      # (after applying any constraint), as well as the required scan direction.
    58      # This is a "hard" limit, meaning that the scan operator must never return
    59      # more than this number of rows, even if more are available. If its value is
    60      # zero, then the limit is unknown, and the scan should return all available
    61      # rows.
    62      HardLimit ScanLimit
    63  
    64      # Flags modify how the table is scanned, such as which index is used to scan.
    65      Flags ScanFlags
    66  
    67      # Locking represents the row-level locking mode of the Scan. Most scans
    68      # leave this unset (Strength = ForNone), which indicates that no row-level
    69      # locking will be performed while scanning the table. Stronger locking modes
    70      # are used by SELECT .. FOR [KEY] UPDATE/SHARE statements and by the initial
    71      # row retrieval of DELETE and UPDATE statements. The locking item's Targets
    72      # list will always be empty when part of a ScanPrivate.
    73      Locking LockingItem
    74  
    75      # PartitionConstrainedScan records whether or not we were able to use partitions
    76      # to constrain the lookup spans further. This flag is used to record telemetry
    77      # about how often this optimization is getting applied.
    78      PartitionConstrainedScan bool
    79  }
    80  
    81  # SequenceSelect represents a read from a sequence as a data source. It always returns
    82  # three columns, last_value, log_cnt, and is_called, with a single row. last_value is
    83  # the most recent value returned from the sequence and log_cnt and is_called are
    84  # always 0 and true, respectively.
    85  [Relational]
    86  define SequenceSelect {
    87      _ SequenceSelectPrivate
    88  }
    89  
    90  [Private]
    91  define SequenceSelectPrivate {
    92      # Sequence identifies the sequence to read from.
    93      Sequence SequenceID
    94  
    95      # Cols is the 3 element list of column IDs returned by the operator.
    96      Cols ColList
    97  }
    98  
    99  # Values returns a manufactured result set containing a constant number of rows.
   100  # specified by the Rows list field. Each row must contain the same set of
   101  # columns in the same order.
   102  #
   103  # The Rows field contains a list of Tuples, one for each row. Each tuple has
   104  # the same length (same with that of Cols).
   105  #
   106  # The Cols field contains the set of column indices returned by each row
   107  # as an opt.ColList. It is legal for Cols to be empty.
   108  [Relational]
   109  define Values {
   110      Rows ScalarListExpr
   111      _ ValuesPrivate
   112  }
   113  
   114  [Private]
   115  define ValuesPrivate {
   116      Cols ColList
   117  
   118      # ID is a memo-unique identifier which distinguishes between identical
   119      # Values expressions which appear in different places in the query. In most
   120      # cases the column set is sufficient to do this, but various rules make it
   121      # possible to construct Values expressions with no columns.
   122      ID UniqueID
   123  }
   124  
   125  # Select filters rows from its input result set, based on the boolean filter
   126  # predicate expression. Rows which do not match the filter are discarded. While
   127  # the Filter operand can be any boolean expression, normalization rules will
   128  # typically convert it to a Filters operator in order to make conjunction list
   129  # matching easier.
   130  [Relational]
   131  define Select {
   132      Input RelExpr
   133      Filters FiltersExpr
   134  }
   135  
   136  # Project modifies the set of columns returned by the input result set. Columns
   137  # can be removed, reordered, or renamed. In addition, new columns can be
   138  # synthesized.
   139  #
   140  # Projections describes the synthesized columns constructed by Project, and
   141  # Passthrough describes the input columns that are passed through as Project
   142  # output columns.
   143  [Relational]
   144  define Project {
   145      Input RelExpr
   146      Projections ProjectionsExpr
   147      Passthrough ColSet
   148  
   149      # notNullCols is the set of columns (input or synthesized) that are known to
   150      # be not-null.
   151      notNullCols ColSet
   152  
   153      # internalFuncDeps are the functional dependencies between all columns
   154      # (input or synthesized).
   155      internalFuncDeps FuncDepSet
   156  }
   157  
   158  # InnerJoin creates a result set that combines columns from its left and right
   159  # inputs, based upon its "on" join predicate. Rows which do not match the
   160  # predicate are filtered. While expressions in the predicate can refer to
   161  # columns projected by either the left or right inputs, the inputs are not
   162  # allowed to refer to the other's projected columns.
   163  [Relational, Join, JoinNonApply]
   164  define InnerJoin {
   165      Left RelExpr
   166      Right RelExpr
   167      On FiltersExpr
   168      _ JoinPrivate
   169  }
   170  
   171  [Relational, Join, JoinNonApply]
   172  define LeftJoin {
   173      Left RelExpr
   174      Right RelExpr
   175      On FiltersExpr
   176      _ JoinPrivate
   177  }
   178  
   179  [Relational, Join, JoinNonApply]
   180  define RightJoin {
   181      Left RelExpr
   182      Right RelExpr
   183      On FiltersExpr
   184      _ JoinPrivate
   185  }
   186  
   187  [Relational, Join, JoinNonApply]
   188  define FullJoin {
   189      Left RelExpr
   190      Right RelExpr
   191      On FiltersExpr
   192      _ JoinPrivate
   193  }
   194  
   195  [Relational, Join, JoinNonApply]
   196  define SemiJoin {
   197      Left RelExpr
   198      Right RelExpr
   199      On FiltersExpr
   200      _ JoinPrivate
   201  }
   202  
   203  [Relational, Join, JoinNonApply]
   204  define AntiJoin {
   205      Left RelExpr
   206      Right RelExpr
   207      On FiltersExpr
   208      _ JoinPrivate
   209  }
   210  
   211  # JoinPrivate is shared between the various join operators including apply
   212  # variants, but excluding IndexJoin, LookupJoin, MergeJoin.
   213  [Private]
   214  define JoinPrivate {
   215      # Flags modify what type of join we choose.
   216      Flags JoinFlags
   217  }
   218  
   219  # IndexJoin represents an inner join between an input expression and a primary
   220  # index. It is a special case of LookupJoin where the input columns are the PK
   221  # columns of the table we are looking up into, and every input row results in
   222  # exactly one output row.
   223  #
   224  # IndexJoin operators are created from Scan operators (unlike lookup joins which
   225  # are created from Join operators).
   226  [Relational]
   227  define IndexJoin {
   228      Input RelExpr
   229      _ IndexJoinPrivate
   230  }
   231  
   232  [Private]
   233  define IndexJoinPrivate {
   234      # Table identifies the table to do lookups in. The primary index is
   235      # currently the only index used.
   236      Table TableID
   237  
   238      # Cols specifies the set of columns that the index join operator projects.
   239      # This may be a subset of the columns that the table contains.
   240      Cols ColSet
   241  }
   242  
   243  # LookupJoin represents a join between an input expression and an index. The
   244  # type of join is in the LookupJoinPrivate field.
   245  [Relational]
   246  define LookupJoin {
   247      Input RelExpr
   248      On FiltersExpr
   249      _ LookupJoinPrivate
   250  
   251      # lookupProps caches relational properties for the "table" side of the lookup
   252      # join, treating it as if it were another relational input. This makes the
   253      # lookup join appear more like other join operators.
   254      lookupProps RelProps
   255  }
   256  
   257  [Private]
   258  define LookupJoinPrivate {
   259      # JoinType is InnerJoin, LeftJoin, SemiJoin, or AntiJoin.
   260      JoinType Operator
   261  
   262      # Table identifies the table do to lookups in.
   263      Table TableID
   264  
   265      # Index identifies the index to do lookups in (whether primary or secondary).
   266      # It can be passed to the cat.Table.Index() method in order to fetch the
   267      # cat.Index metadata.
   268      Index IndexOrdinal
   269  
   270      # KeyCols are the columns (produced by the input) used to create lookup keys.
   271      # The key columns must be non-empty, and are listed in the same order as the
   272      # index columns (or a prefix of them).
   273      KeyCols ColList
   274  
   275      # Cols is the set of columns produced by the lookup join. This set can
   276      # contain columns from the input and columns from the index. Any columns not
   277      # in the input are retrieved from the index. Cols may not contain some or
   278      # all of the KeyCols, if they are not output columns for the join.
   279      #
   280      # TODO(radu): this effectively allows an arbitrary projection; it should be
   281      # just a LookupCols set indicating which columns we should add from the
   282      # index. However, this requires extra Project operators in the lookup join
   283      # exploration transforms which currently leads to problems related to lookup
   284      # join statistics.
   285      Cols ColSet
   286  
   287      # LookupColsAreTableKey is true if the lookup columns form a key in the
   288      # table (and thus each left row matches with at most one table row).
   289      LookupColsAreTableKey bool
   290  
   291      # ConstFilters contains the constant filters that are represented as equality
   292      # conditions on the KeyCols. These filters are needed by the statistics code to
   293      # correctly estimate selectivity.
   294      ConstFilters FiltersExpr
   295      _ JoinPrivate
   296  }
   297  
   298  # GeoLookupJoin represents a join between an input expression and an index,
   299  # where the index is an inverted index on a Geometry or Geography column.
   300  #
   301  # A GeoLookupJoin can be generated for queries containing a join where one of
   302  # the join conditions is a geospatial binary function such as ST_Covers or
   303  # ST_CoveredBy, and at least one of the two inputs to the function is an
   304  # indexed geospatial column. The type of geospatial function implies the
   305  # GeoRelationshipType (Covers, CoveredBy or Intersects) for the join, which is
   306  # stored in the GeoLookupJoinPrivate and affects how the join is executed. For
   307  # a full list of the geospatial functions that can be index-accelerated and
   308  # their corresponding GeoRelationshipTypes, see geoRelationshipMap in
   309  # xform/custom_funcs.go.
   310  #
   311  # The GeoLookupJoin has no false negatives, but it may return false positives
   312  # that would not have been returned by the original geospatial function
   313  # join predicate. Therefore, the original function must still be applied on
   314  # the output of the join. Since the inverted index does not actually include
   315  # the geospatial column (or any other columns besides the primary key columns),
   316  # the GeoLookupJoin will be wrapped in an index join. The geospatial function
   317  # and any other filters on non-key columns will be appied as filters on the
   318  # outer index join.
   319  [Relational]
   320  define GeoLookupJoin {
   321      Input RelExpr
   322  
   323      # On only contains filters on the input columns and primary key columns of
   324      # the inverted index's base table. (Since the indexed geospatial column is
   325      # not actually included in the index, the GeoLookupJoin must be wrapped in
   326      # an index join, which will contain the original geospatial function as one
   327      # of its On conditions.)
   328      On FiltersExpr
   329      _ GeoLookupJoinPrivate
   330  
   331      # lookupProps caches relational properties for the "table" side of the lookup
   332      # join, treating it as if it were another relational input. This makes the
   333      # lookup join appear more like other join operators.
   334      lookupProps RelProps
   335  }
   336  
   337  [Private]
   338  define GeoLookupJoinPrivate {
   339      # JoinType is InnerJoin, LeftJoin, SemiJoin, or AntiJoin.
   340      JoinType Operator
   341  
   342      # GeoRelationshipType is Covers, CoveredBy, or Intersects.
   343      GeoRelationshipType GeoRelationshipType
   344  
   345      # Table identifies the table do to lookups in.
   346      Table TableID
   347  
   348      # Index identifies the geospatial inverted index to do lookups in. It can
   349      # be passed to the cat.Table.Index() method in order to fetch the cat.Index
   350      # metadata.
   351      Index IndexOrdinal
   352  
   353      # GeoCol is the geospatial column (produced by the input) used to
   354      # determine the keys (i.e., s2 CellIDs) to scan in the inverted index.
   355      GeoCol ColumnID
   356  
   357      # Cols is the set of columns produced by the geospatial lookup join. This
   358      # set can contain columns from the input and columns from the index. Any
   359      # columns not in the input are retrieved from the index.
   360      Cols ColSet
   361      _ JoinPrivate
   362  }
   363  
   364  # MergeJoin represents a join that is executed using merge-join.
   365  # MergeOn is a scalar which contains the ON condition and merge-join ordering
   366  # information; see the MergeOn scalar operator.
   367  # It can be any type of join (identified in the MergeJoinPrivate field).
   368  [Relational]
   369  define MergeJoin {
   370      Left RelExpr
   371      Right RelExpr
   372      On FiltersExpr
   373      _ MergeJoinPrivate
   374  }
   375  
   376  [Private]
   377  define MergeJoinPrivate {
   378      # JoinType is one of the basic join operators: InnerJoin, LeftJoin,
   379      # RightJoin, FullJoin, SemiJoin, AntiJoin.
   380      JoinType Operator
   381  
   382      # LeftEq and RightEq are orderings on equality columns. They have the same
   383      # length and LeftEq[i] is a column on the left side which is constrained to
   384      # be equal to RightEq[i] on the right side. The directions also have to
   385      # match.
   386      #
   387      # Examples of valid settings for abc JOIN def ON a=d,b=e:
   388      #   LeftEq: a+,b+   RightEq: d+,e+
   389      #   LeftEq: b-,a+   RightEq: e-,d+
   390      LeftEq Ordering
   391      RightEq Ordering
   392  
   393      # LeftOrdering and RightOrdering are "simplified" versions of LeftEq/RightEq,
   394      # taking into account the functional dependencies of each side. We need both
   395      # versions because we need to configure execution with specific equality
   396      # columns and orderings.
   397      LeftOrdering OrderingChoice
   398      RightOrdering OrderingChoice
   399      _ JoinPrivate
   400  }
   401  
   402  # ZigzagJoin represents a join that is executed using the zigzag joiner.
   403  # All fields except for the ON expression are stored in the private;
   404  # since the zigzag joiner operates directly on indexes and doesn't
   405  # support arbitrary inputs.
   406  #
   407  # TODO(itsbilal): Add support for representing multi-way zigzag joins.
   408  [Relational, Telemetry]
   409  define ZigzagJoin {
   410      On FiltersExpr
   411      _ ZigzagJoinPrivate
   412  
   413      # leftProps and rightProps cache relational properties corresponding to an
   414      # unconstrained scan on the respective indexes. By putting this in the
   415      # expr, zigzag joins can reuse a lot of the logical property building code
   416      # for joins.
   417      leftProps RelProps
   418      rightProps RelProps
   419  }
   420  
   421  [Private]
   422  define ZigzagJoinPrivate {
   423      # LeftTable and RightTable identifies the left and right tables for this
   424      # join.
   425      LeftTable TableID
   426      RightTable TableID
   427  
   428      # LeftIndex and RightIndex identifies the index to do lookups in (whether
   429      # primary or secondary). It can be passed to the cat.Table.Index() method in
   430      # order to fetch the cat.Index metadata.
   431      LeftIndex IndexOrdinal
   432      RightIndex IndexOrdinal
   433  
   434      # LeftEqCols and RightEqCols contains lists of columns on the left and
   435      # right sides that are being equated. Both lists must be of equal length.
   436      LeftEqCols ColList
   437      RightEqCols ColList
   438  
   439      # FixedVals, LeftFixedCols and RightFixedCols reference fixed values.
   440      # Fixed values are constants that constrain each index' prefix columns
   441      # (the ones denoted in {Left,Right}FixedCols). These fixed columns must
   442      # lie at the start of the index and must immediately precede EqCols.
   443      #
   444      # FixedVals is a list of 2 tuples, each representing one side's fixed
   445      # values.
   446      #
   447      # Read the comment in pkg/sql/distsqlrun/zigzagjoiner.go for more on
   448      # fixed and equality columns.
   449      FixedVals ScalarListExpr
   450      LeftFixedCols ColList
   451      RightFixedCols ColList
   452  
   453      # Cols is the set of columns produced by the zigzag join. This set can
   454      # contain columns from either side's index.
   455      Cols ColSet
   456  }
   457  
   458  # InnerJoinApply has the same join semantics as InnerJoin. However, unlike
   459  # InnerJoin, it allows the right input to refer to columns projected by the
   460  # left input.
   461  [Relational, Join, JoinApply, Telemetry]
   462  define InnerJoinApply {
   463      Left RelExpr
   464      Right RelExpr
   465      On FiltersExpr
   466      _ JoinPrivate
   467  }
   468  
   469  [Relational, Join, JoinApply, Telemetry]
   470  define LeftJoinApply {
   471      Left RelExpr
   472      Right RelExpr
   473      On FiltersExpr
   474      _ JoinPrivate
   475  }
   476  
   477  [Relational, Join, JoinApply, Telemetry]
   478  define SemiJoinApply {
   479      Left RelExpr
   480      Right RelExpr
   481      On FiltersExpr
   482      _ JoinPrivate
   483  }
   484  
   485  [Relational, Join, JoinApply, Telemetry]
   486  define AntiJoinApply {
   487      Left RelExpr
   488      Right RelExpr
   489      On FiltersExpr
   490      _ JoinPrivate
   491  }
   492  
   493  # GroupBy computes aggregate functions over groups of input rows. Input rows
   494  # that are equal on the grouping columns are grouped together. The set of
   495  # computed aggregate functions is described by the Aggregations field (which is
   496  # always an Aggregations operator).
   497  #
   498  # The arguments of the aggregate functions are columns from the input
   499  # (i.e. Variables), possibly wrapped in aggregate modifiers like AggDistinct.
   500  #
   501  # If the set of input rows is empty, then the output of the GroupBy operator
   502  # will also be empty. If the grouping columns are empty, then all input rows
   503  # form a single group. GroupBy is used for queries with aggregate functions,
   504  # HAVING clauses and/or GROUP BY expressions.
   505  #
   506  # The GroupingPrivate field contains an ordering; this ordering serves a
   507  # dual-purpose:
   508  #  - if we ignore any grouping columns, the remaining columns indicate an
   509  #    intra-group ordering; this is useful if there is an order-dependent
   510  #    aggregation (like ARRAY_AGG).
   511  #  - any prefix containing only grouping columns is used to execute the
   512  #    aggregation in a streaming fashion.
   513  #
   514  # Currently, the initially built GroupBy has all grouping columns as "optional"
   515  # in the ordering (we call this the "canonical" variant). Subsequently, the
   516  # GenerateStreamingGroupBy exploration rule can add more variants, based on
   517  # interesting orderings.
   518  [Relational, Grouping, Telemetry]
   519  define GroupBy {
   520      Input RelExpr
   521      Aggregations AggregationsExpr
   522      _ GroupingPrivate
   523  }
   524  
   525  # GroupingPrivate is shared between the grouping-related operators: GroupBy
   526  # ScalarGroupBy, DistinctOn, EnsureDistinctOn, UpsertDistinctOn, and
   527  # EnsureUpsertDistinctOn. This allows the operators to be treated
   528  # polymorphically.
   529  [Private]
   530  define GroupingPrivate {
   531      # GroupingCols partitions the GroupBy input rows into aggregation groups.
   532      # All rows sharing the same values for these columns are in the same group.
   533      # GroupingCols is always empty in the ScalarGroupBy case.
   534      GroupingCols ColSet
   535  
   536      # Ordering specifies the order required of the input. This order can intermix
   537      # grouping and non-grouping columns, serving a dual-purpose:
   538      #  - if we ignore grouping columns, it specifies an intra-group ordering (sort
   539      #    order of values within each group, useful for order-sensitive aggregation
   540      #    operators like ArrayAgg;
   541      #  - leading grouping columns specify an inter-group ordering, allowing for
   542      #    more efficient streaming execution.
   543      #
   544      # The canonical operation always contains an ordering that has no grouping
   545      # columns. Exploration rules can create versions of the operator with
   546      # orderings that contain grouping columns.
   547      Ordering OrderingChoice
   548  
   549      # NullsAreDistinct specifies the null behavior of the grouping operator. If
   550      # true, the operator considers nulls to be distinct for grouping purposes.
   551      # NullsAreDistinct should only be true for UpsertDistinctOn and
   552      # EnsureUpsertDistinctOn.
   553      NullsAreDistinct bool
   554  
   555      # ErrorOnDup, if non-empty, triggers an error with the given text if any
   556      # aggregation group contains more than one row. This can only take on a
   557      # value for the EnsureDistinctOn and EnsureUpsertDistinctOn operators.
   558      ErrorOnDup string
   559  }
   560  
   561  # ScalarGroupBy computes aggregate functions over the complete set of input
   562  # rows. This is similar to GroupBy with empty grouping columns, where all input
   563  # rows form a single group. However, there is an important difference. If the
   564  # input set is empty, then the output of the ScalarGroupBy operator will have a
   565  # single row containing default values for each aggregate function (typically
   566  # null or zero, depending on the function). ScalarGroupBy always returns exactly
   567  # one row - either the single-group aggregates or the default aggregate values.
   568  #
   569  # ScalarGroupBy uses the GroupingPrivate struct so that it's polymorphic with
   570  # GroupBy and can be used in the same rules (when appropriate). In the
   571  # ScalarGroupBy case, the grouping column field in GroupingPrivate is always
   572  # empty.
   573  [Relational, Grouping, Telemetry]
   574  define ScalarGroupBy {
   575      Input RelExpr
   576      Aggregations AggregationsExpr
   577      _ GroupingPrivate
   578  }
   579  
   580  # DistinctOn filters out rows that are identical on the set of grouping columns;
   581  # only the first row (according to an ordering) is kept for each set of possible
   582  # values. It is roughly equivalent with a GroupBy on the same grouping columns
   583  # except that it uses FirstAgg functions that ensure the value on the first row
   584  # is chosen (across all aggregations).
   585  #
   586  # In addition, the value on that first row must be chosen for all the grouping
   587  # columns as well; this is relevant in the case of equal but non-identical
   588  # values, like decimals. For example, if we have rows (1, 2.0) and (1.0, 2) and
   589  # we are grouping on these two columns, the values output can be either (1, 2.0)
   590  # or (1.0, 2), but not (1.0, 2.0).
   591  #
   592  # The execution of DistinctOn resembles that of Select more than that of
   593  # GroupBy: each row is tested against a map of what groups we have seen already,
   594  # and is either passed through or discarded. In particular, note that this
   595  # preserves the input ordering.
   596  #
   597  # The ordering in the GroupingPrivate field will be required of the input; it
   598  # determines which row can get "chosen" for each group of values on the grouping
   599  # columns. There is no restriction on the ordering; but note that grouping
   600  # columns are inconsequential - they can appear anywhere in the ordering and
   601  # they won't change the results (other than the result ordering).
   602  #
   603  # Currently when we build DistinctOn, we set all grouping columns as optional
   604  # cols in Ordering (but this is not required by the operator).
   605  #
   606  # TODO(radu): in the future we may want an exploration transform to try out more
   607  # specific interesting orderings because execution is more efficient when we can
   608  # rely on an ordering on the grouping columns (or a subset of them).
   609  #
   610  # DistinctOn uses an Aggregations child and the GroupingPrivate struct so that
   611  # it's polymorphic with GroupBy and can be used in the same rules (when
   612  # appropriate). In the DistinctOn case, the aggregations can be only FirstAgg or
   613  # ConstAgg.
   614  [Relational, Grouping, Telemetry]
   615  define DistinctOn {
   616      Input RelExpr
   617      Aggregations AggregationsExpr
   618      _ GroupingPrivate
   619  }
   620  
   621  # EnsureDistinctOn is a variation on DistinctOn that is only used to replace a
   622  # Max1Row operator in a decorrelation attempt. It raises an error if any
   623  # distinct grouping contains more than one row. Or in other words, it "ensures"
   624  # that the input is distinct on the grouping columns.
   625  #
   626  # EnsureDistinctOn is used when nulls are not considered distinct for grouping
   627  # purposes and an error should be raised when duplicates are detected.
   628  #
   629  # Rules should only "push through" or eliminate an EnsureDistinctOn if they
   630  # preserve the expected error behavior. For example, it would be invalid to
   631  # push a Select filter into an EnsureDistinctOn, as it might eliminate rows
   632  # that would otherwise trigger the EnsureDistinctOn error.
   633  [Relational, Grouping, Telemetry]
   634  define EnsureDistinctOn {
   635      Input RelExpr
   636      Aggregations AggregationsExpr
   637      _ GroupingPrivate
   638  }
   639  
   640  # UpsertDistinctOn is a variation on DistinctOn that is only used with UPSERT
   641  # and INSERT..ON CONFLICT statements. Unlike DistinctOn, UpsertDistinctOn treats
   642  # NULL values as not equal to one another for purposes of grouping. Two rows
   643  # having a NULL-valued grouping column will be placed in different groups. This
   644  # differs from DistinctOn behavior, where the two rows would be grouped
   645  # together. This behavior difference reflects SQL semantics, in which a unique
   646  # index key still allows multiple NULL values.
   647  #
   648  # UpsertDistinctOn is used when nulls are considered distinct for grouping
   649  # purposes and duplicates should be filtered out without raising an error.
   650  [Relational, Grouping, Telemetry]
   651  define UpsertDistinctOn {
   652      Input RelExpr
   653      Aggregations AggregationsExpr
   654      _ GroupingPrivate
   655  }
   656  
   657  # EnsureUpsertDistinctOn is a variation on UpsertDistinctOn that is only used
   658  # with UPSERT and INSERT..ON CONFLICT statements. Like UpsertDistinctOn,
   659  # EnsureUpsertDistinctOn treats NULL values as not equal to one another for
   660  # purposes of grouping. Unlike UpsertDistinctOn, it raises an error if any
   661  # distinct grouping contains more than one row. Or in other words, it "ensures"
   662  # that the input is distinct on the grouping columns.
   663  #
   664  # EnsureUpsertDistinctOn is used when nulls are considered distinct for grouping
   665  # purposes and an error should be raised when duplicates are detected.
   666  #
   667  # Rules should only "push through" or eliminate an EnsureUpsertDistinctOn if
   668  # they preserve the expected error behavior. For example, it would be invalid to
   669  # push a Select filter into an EnsureUpsertDistinctOn, as it might eliminate
   670  # rows that would otherwise trigger the EnsureUpsertDistinctOn error.
   671  [Relational, Grouping, Telemetry]
   672  define EnsureUpsertDistinctOn {
   673      Input RelExpr
   674      Aggregations AggregationsExpr
   675      _ GroupingPrivate
   676  }
   677  
   678  # Union is an operator used to combine the Left and Right input relations into
   679  # a single set containing rows from both inputs. Duplicate rows are discarded.
   680  # The SetPrivate field matches columns from the Left and Right inputs of the
   681  # Union with the output columns. See the comment above SetPrivate for more
   682  # details.
   683  [Relational, Set]
   684  define Union {
   685      Left RelExpr
   686      Right RelExpr
   687      _ SetPrivate
   688  }
   689  
   690  # SetPrivate contains fields used by the relational set operators: Union,
   691  # Intersect, Except, UnionAll, IntersectAll and ExceptAll. It matches columns
   692  # from the left and right inputs of the operator with the output columns, since
   693  # OutputCols are not ordered and may not correspond to each other.
   694  #
   695  # For example, consider the following query:
   696  #   SELECT y, x FROM xy UNION SELECT b, a FROM ab
   697  #
   698  # Given:
   699  #   col  index
   700  #   x    1
   701  #   y    2
   702  #   a    3
   703  #   b    4
   704  #
   705  # SetPrivate will contain the following values:
   706  #   Left:  [2, 1]
   707  #   Right: [4, 3]
   708  #   Out:   [5, 6]  <-- synthesized output columns
   709  #
   710  # To make normalization rules and execution simpler, both inputs to the set op
   711  # must have matching types.
   712  [Private]
   713  define SetPrivate {
   714      LeftCols ColList
   715      RightCols ColList
   716      OutCols ColList
   717  }
   718  
   719  # Intersect is an operator used to perform an intersection between the Left
   720  # and Right input relations. The result consists only of rows in the Left
   721  # relation that are also present in the Right relation. Duplicate rows are
   722  # discarded.
   723  # The SetPrivate field matches columns from the Left and Right inputs of the
   724  # Intersect with the output columns. See the comment above SetPrivate for more
   725  # details.
   726  [Relational, Set]
   727  define Intersect {
   728      Left RelExpr
   729      Right RelExpr
   730      _ SetPrivate
   731  }
   732  
   733  # Except is an operator used to perform a set difference between the Left and
   734  # Right input relations. The result consists only of rows in the Left relation
   735  # that are not present in the Right relation. Duplicate rows are discarded.
   736  # The SetPrivate field matches columns from the Left and Right inputs of the Except
   737  # with the output columns. See the comment above SetPrivate for more details.
   738  [Relational, Set]
   739  define Except {
   740      Left RelExpr
   741      Right RelExpr
   742      _ SetPrivate
   743  }
   744  
   745  # UnionAll is an operator used to combine the Left and Right input relations
   746  # into a single set containing rows from both inputs. Duplicate rows are
   747  # not discarded. For example:
   748  #
   749  #   SELECT x FROM xx UNION ALL SELECT y FROM yy
   750  #     x       y         out
   751  #   -----   -----      -----
   752  #     1       1          1
   753  #     1       2    ->    1
   754  #     2       3          1
   755  #                        2
   756  #                        2
   757  #                        3
   758  #
   759  # The SetPrivate field matches columns from the Left and Right inputs of the
   760  # UnionAll with the output columns. See the comment above SetPrivate for more
   761  # details.
   762  [Relational, Set]
   763  define UnionAll {
   764      Left RelExpr
   765      Right RelExpr
   766      _ SetPrivate
   767  }
   768  
   769  # IntersectAll is an operator used to perform an intersection between the Left
   770  # and Right input relations. The result consists only of rows in the Left
   771  # relation that have a corresponding row in the Right relation. Duplicate rows
   772  # are not discarded. This effectively creates a one-to-one mapping between the
   773  # Left and Right rows. For example:
   774  #
   775  #   SELECT x FROM xx INTERSECT ALL SELECT y FROM yy
   776  #     x       y         out
   777  #   -----   -----      -----
   778  #     1       1          1
   779  #     1       1    ->    1
   780  #     1       2          2
   781  #     2       2          2
   782  #     2       3
   783  #     4
   784  #
   785  # The SetPrivate field matches columns from the Left and Right inputs of the
   786  # IntersectAll with the output columns. See the comment above SetPrivate for more
   787  # details.
   788  [Relational, Set]
   789  define IntersectAll {
   790      Left RelExpr
   791      Right RelExpr
   792      _ SetPrivate
   793  }
   794  
   795  # ExceptAll is an operator used to perform a set difference between the Left
   796  # and Right input relations. The result consists only of rows in the Left
   797  # relation that do not have a corresponding row in the Right relation.
   798  # Duplicate rows are not discarded. This effectively creates a one-to-one
   799  # mapping between the Left and Right rows. For example:
   800  #   SELECT x FROM xx EXCEPT ALL SELECT y FROM yy
   801  #     x       y         out
   802  #   -----   -----      -----
   803  #     1       1    ->    1
   804  #     1       1          4
   805  #     1       2
   806  #     2       2
   807  #     2       3
   808  #     4
   809  #
   810  # The SetPrivate field matches columns from the Left and Right inputs of the
   811  # ExceptAll with the output columns. See the comment above SetPrivate for more
   812  # details.
   813  [Relational, Set]
   814  define ExceptAll {
   815      Left RelExpr
   816      Right RelExpr
   817      _ SetPrivate
   818  }
   819  
   820  # Limit returns a limited subset of the results in the input relation. The limit
   821  # expression is a scalar value; the operator returns at most this many rows. The
   822  # Orering field is a physical.OrderingChoice which indicates the row ordering
   823  # required from the input (the first rows with respect to this ordering are
   824  # returned).
   825  [Relational]
   826  define Limit {
   827      Input RelExpr
   828      Limit ScalarExpr
   829      Ordering OrderingChoice
   830  }
   831  
   832  # Offset filters out the first Offset rows of the input relation; used in
   833  # conjunction with Limit.
   834  [Relational]
   835  define Offset {
   836      Input RelExpr
   837      Offset ScalarExpr
   838      Ordering OrderingChoice
   839  }
   840  
   841  # Max1Row enforces that its input must return at most one row. If the input
   842  # has more than one row, Max1Row raises an error with the specified error text.
   843  #
   844  # Max1Row is most often used as input to the Subquery operator. See the comment
   845  # above Subquery for more details.
   846  [Relational]
   847  define Max1Row {
   848      Input RelExpr
   849      ErrorText string
   850  }
   851  
   852  # Ordinality adds a column to each row in its input containing a unique,
   853  # increasing number.
   854  [Relational]
   855  define Ordinality {
   856      Input RelExpr
   857      _ OrdinalityPrivate
   858  }
   859  
   860  [Private]
   861  define OrdinalityPrivate {
   862      # Ordering denotes the required ordering of the input.
   863      Ordering OrderingChoice
   864  
   865      # ColID holds the id of the column introduced by this operator.
   866      ColID ColumnID
   867  }
   868  
   869  # ProjectSet represents a relational operator which zips through a list of
   870  # generators for every row of the input.
   871  #
   872  # As a reminder, a functional zip over generators a,b,c returns tuples of
   873  # values from a,b,c picked "simultaneously". NULLs are used when a generator is
   874  # "shorter" than another.  For example:
   875  #
   876  #    zip([1,2,3], ['a','b']) = [(1,'a'), (2,'b'), (3, null)]
   877  #
   878  # ProjectSet corresponds to a relational operator project(R, a, b, c, ...)
   879  # which, for each row in R, produces all the rows produced by zip(a, b, c, ...)
   880  # with the values of R prefixed. Formally, this performs a lateral cross join
   881  # of R with zip(a,b,c).
   882  #
   883  # See the Zip header for more details.
   884  [Relational, Telemetry]
   885  define ProjectSet {
   886      Input RelExpr
   887      Zip ZipExpr
   888  }
   889  
   890  # Window represents a window function. Window functions are operators which
   891  # allow computations that take into consideration other rows in the same result
   892  # set.
   893  #
   894  # More concretely, a window function is a relational operator that takes in a
   895  # result set and appends a single new column whose value depends on the other
   896  # rows within the result set, and that row's relative position in it.
   897  #
   898  # Depending on the exact window function being computed, the value of the new
   899  # column could be the position of the row in the output (`row_number`), or a
   900  # cumulative sum, or something else.
   901  [Relational]
   902  define Window {
   903      Input RelExpr
   904  
   905      # Windows is the set of window functions to be computed for this operator.
   906      Windows WindowsExpr
   907      _ WindowPrivate
   908  }
   909  
   910  [Private]
   911  define WindowPrivate {
   912      # Partition is the set of columns to partition on. Every set of rows
   913      # sharing the values for this set of columns will be treated independently.
   914      Partition ColSet
   915  
   916      # Ordering is the ordering that the window function is computed relative to
   917      # within each partition.
   918      Ordering OrderingChoice
   919  
   920      # RangeOffsetColumn is the column ID of a single column from ORDER BY
   921      # clause (when there is only one column). We store it separately because
   922      # Ordering might be simplified (when that single column is in Partition),
   923      # but the execution still needs to know the original ordering with RANGE
   924      # mode of framing when at least one of the bounds has "offset". This column
   925      # ID is used to reconstruct the Ordering during exec build phase.
   926      RangeOffsetColumn ColumnID
   927  }
   928  
   929  # With executes Binding, making its results available to Main. Within Main, the
   930  # results of Binding may be referenced by a WithScan expression containing the
   931  # ID of this With.
   932  [Relational]
   933  define With {
   934      Binding RelExpr
   935      Main RelExpr
   936      _ WithPrivate
   937  }
   938  
   939  [Private]
   940  define WithPrivate {
   941      ID WithID
   942  
   943      # OriginalExpr contains the original CTE expression (so that we can display
   944      # it in the EXPLAIN plan).
   945      OriginalExpr Statement
   946  
   947      # Mtr is used to specify whether or not to override the optimizer's
   948      # default decision for materializing or not materializing tables.
   949      Mtr MaterializeClause
   950  
   951      # Name is used to identify the with for debugging purposes.
   952      Name string
   953  }
   954  
   955  # WithScan returns the results present in the With expression referenced
   956  # by ID.
   957  [Relational]
   958  define WithScan {
   959      _ WithScanPrivate
   960  }
   961  
   962  [Private]
   963  define WithScanPrivate {
   964      # With identifies the CTE to scan.
   965      With WithID
   966  
   967      # BindingProps stores the relational properties of the referenced expression.
   968      BindingProps RelPropsPtr
   969  
   970      # Name is used to identify the with being referenced for debugging purposes.
   971      Name string
   972  
   973      # InCols are the columns output by the expression referenced by this
   974      # expression. They correspond elementwise to the columns listed in OutCols.
   975      InCols ColList
   976  
   977      # OutCols contains a list of columns which correspond elementwise to the
   978      # columns in InCols, which are the IDs output by the referenced With
   979      # expression. WithScan cannot reuse the column IDs used in the original With
   980      # expression, since multiple WithScans referencing the same With can occur in
   981      # the same tree, so we maintain a mapping from the columns returned from
   982      # the referenced expression to the referencing expression.
   983      OutCols ColList
   984  
   985      # ID is a memo-unique identifier which distinguishes between identical
   986      # WithScan expressions which appear in different places in the query. In
   987      # most cases the column set is sufficient to do this, but various rules make
   988      # it possible to construct WithScan expressions with no columns.
   989      ID UniqueID
   990  }
   991  
   992  # RecursiveCTE implements the logic of a recursive CTE:
   993  #  * the Initial query is evaluated; the results are emitted and also saved into
   994  #    a "working table".
   995  #  * so long as the working table is not empty:
   996  #    - the Recursive query (which refers to the working table using a specific
   997  #      WithID) is evaluated; the results are emitted and also saved into a new
   998  #      "working table" for the next iteration.
   999  [Relational]
  1000  define RecursiveCTE {
  1001      Initial RelExpr
  1002      Recursive RelExpr
  1003      _ RecursiveCTEPrivate
  1004  }
  1005  
  1006  [Private]
  1007  define RecursiveCTEPrivate {
  1008      # Name is used to identify the CTE being referenced for debugging purposes.
  1009      Name string
  1010  
  1011      # WithID is the ID through which the Recursive expression refers to the
  1012      # current working table.
  1013      WithID WithID
  1014  
  1015      # InitialCols are the columns produced by the initial expression.
  1016      InitialCols ColList
  1017  
  1018      # RecursiveCols are the columns produced by the recursive expression, that
  1019      # map 1-1 to InitialCols.
  1020      RecursiveCols ColList
  1021  
  1022      # OutCols are the columns produced by the RecursiveCTE operator; they map
  1023      # 1-1 to InitialCols and to RecursiveCols. Similar to Union, we don't want
  1024      # to reuse column IDs from one side because the columns contain values from
  1025      # both sides.
  1026      #
  1027      # These columns are also used by the Recursive query to refer to the working
  1028      # table (see WithID).
  1029      OutCols ColList
  1030  }
  1031  
  1032  # FakeRel is a mock relational operator used for testing; its logical properties
  1033  # are pre-determined and stored in the private. It can be used as the child of
  1034  # an operator for which we are calculating properties or statistics.
  1035  [Relational]
  1036  define FakeRel {
  1037      _ FakeRelPrivate
  1038  }
  1039  
  1040  [Private]
  1041  define FakeRelPrivate {
  1042      Props RelPropsPtr
  1043  }