vitess.io/vitess@v0.16.2/go/vt/vttablet/tabletmanager/vdiff/primitive_executor.go (about)

     1  /*
     2  Copyright 2022 The Vitess Authors.
     3  
     4  Licensed under the Apache License, Version 2.0 (the "License");
     5  you may not use this file except in compliance with the License.
     6  You may obtain a copy of the License at
     7  
     8      http://www.apache.org/licenses/LICENSE-2.0
     9  
    10  Unless required by applicable law or agreed to in writing, software
    11  distributed under the License is distributed on an "AS IS" BASIS,
    12  WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13  See the License for the specific language governing permissions and
    14  limitations under the License.
    15  */
    16  
    17  package vdiff
    18  
    19  import (
    20  	"context"
    21  
    22  	"vitess.io/vitess/go/sqltypes"
    23  	querypb "vitess.io/vitess/go/vt/proto/query"
    24  	"vitess.io/vitess/go/vt/vterrors"
    25  	vtgateEngine "vitess.io/vitess/go/vt/vtgate/engine"
    26  )
    27  
    28  /*
    29  	VDiff gets data from multiple sources. There is a global ordering of all the data (based on PKs). However, that
    30  	data is potentially spread across shards. The distribution is selected by the vindex (sharding key) and hence
    31  	we need to globally sort the data across the shards to match the order on the target: we need the rows to be in
    32  	order so that we can compare the data correctly.
    33  
    34  	We leverage the merge sorting functionality that vtgate uses to return sorted data from a scatter query.
    35  		* Merge sorter engine.Primitives are set up, one each for the source and target shards.
    36  		* This merge sorter is embedded by a primitiveExecutor, which also contains a query result channel,
    37  			and a list of rows, sorted for the specific shard. These rows have already been popped
    38  			from the query result, but not yet compared since they do not yet contain the "topmost" row.
    39  		* The result channel is populated by the shardStreamer, which satisfies the engine.StreamExecutor interface.
    40  			The shardStreamer gets data using VStreamRows for that shard.
    41  */
    42  
    43  // primitiveExecutor starts execution on the top level primitive
    44  // and provides convenience functions for row-by-row iteration.
    45  type primitiveExecutor struct {
    46  	prim     vtgateEngine.Primitive
    47  	rows     [][]sqltypes.Value
    48  	resultch chan *sqltypes.Result
    49  	err      error
    50  
    51  	name string // for debug purposes only
    52  }
    53  
    54  func newPrimitiveExecutor(ctx context.Context, prim vtgateEngine.Primitive, name string) *primitiveExecutor {
    55  	pe := &primitiveExecutor{
    56  		prim:     prim,
    57  		resultch: make(chan *sqltypes.Result, 1),
    58  		name:     name,
    59  	}
    60  	vcursor := &contextVCursor{ctx: ctx}
    61  
    62  	// handles each callback from the merge sorter, waits for a result set from the shard streamer and pushes it on the result channel
    63  	go func() {
    64  		defer close(pe.resultch)
    65  		pe.err = vcursor.StreamExecutePrimitive(ctx, pe.prim, make(map[string]*querypb.BindVariable), true, func(qr *sqltypes.Result) error {
    66  
    67  			select {
    68  			case pe.resultch <- qr:
    69  			case <-ctx.Done():
    70  				return vterrors.Wrap(ctx.Err(), "Outer Stream")
    71  			}
    72  			return nil
    73  		})
    74  	}()
    75  	return pe
    76  }
    77  
    78  // next gets the next row in the stream for this shard, if there's currently no rows to process in the stream then wait on the
    79  // result channel for the shard streamer to produce them.
    80  func (pe *primitiveExecutor) next() ([]sqltypes.Value, error) {
    81  	for len(pe.rows) == 0 {
    82  		qr, ok := <-pe.resultch
    83  		if !ok {
    84  			return nil, pe.err
    85  		}
    86  		pe.rows = qr.Rows
    87  	}
    88  
    89  	row := pe.rows[0]
    90  	pe.rows = pe.rows[1:]
    91  	return row, nil
    92  }
    93  
    94  // drain fastforward's a shard to process (and ignore) everything from its results stream and return a count of the
    95  // discarded rows.
    96  func (pe *primitiveExecutor) drain(ctx context.Context) (int64, error) {
    97  	var count int64
    98  	for {
    99  		row, err := pe.next()
   100  		if err != nil {
   101  			return 0, err
   102  		}
   103  		if row == nil {
   104  			return count, nil
   105  		}
   106  		count++
   107  	}
   108  }