github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/utils/pipeline/pipeline.go (about)

     1  // Copyright 2020 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipeline
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  
    22  	"golang.org/x/sync/errgroup"
    23  )
    24  
    25  // ErrUnknownStageName is the error returned when an unknown stage name is referenced
    26  var ErrUnknownStageName = errors.New("unknown stage name")
    27  
    28  // ErrPipelidneAborted is the error returned from Pipeline.Wait when Pipeline.Abort is called.
    29  var ErrPipelineAborted = errors.New("pipeline aborted")
    30  
    31  // Pipeline is a batch processor which takes data in batches and transforms it in stages
    32  type Pipeline struct {
    33  	nameToStage map[string]*Stage
    34  	stages      []*Stage
    35  	waitCh      <-chan []ItemWithProps
    36  	eg          *errgroup.Group
    37  	ctx         context.Context
    38  }
    39  
    40  // NewPipeline creates a new Pipeline from an ordered slice of stages. The first stage in the pipeline must produce data
    41  // and each stage will pass data on to the next stage.
    42  func NewPipeline(stages ...*Stage) *Pipeline {
    43  	var nextInStage chan []ItemWithProps
    44  
    45  	outBatchSize := -1
    46  	nameToStage := make(map[string]*Stage)
    47  
    48  	p := &Pipeline{waitCh: nextInStage}
    49  	for i := len(stages) - 1; i >= 0; i-- {
    50  		nextInStage = stages[i].init(outBatchSize, nextInStage, p)
    51  		outBatchSize = stages[i].inBatchSize
    52  
    53  		nameToStage[stages[i].name] = stages[i]
    54  	}
    55  
    56  	p.eg = nil
    57  	p.stages = stages
    58  	p.nameToStage = nameToStage
    59  
    60  	return p
    61  }
    62  
    63  // Start the pipeline
    64  func (p *Pipeline) Start(ctx context.Context) {
    65  	if p.eg != nil {
    66  		panic("started multiple times")
    67  	}
    68  
    69  	p.eg, p.ctx = errgroup.WithContext(ctx)
    70  	for _, stage := range p.stages {
    71  		stage.start(p.eg, p.ctx)
    72  	}
    73  }
    74  
    75  // Wait waits for the pipeline to finish
    76  func (p *Pipeline) Wait() error {
    77  	return p.eg.Wait()
    78  }
    79  
    80  // Abort aborts the pipeline.  After abort is called the pipeline will continue running closing asynchronously
    81  // Use Wait() if you want to wait for the pipeline to finish closing before continuing.
    82  func (p *Pipeline) Abort() {
    83  	p.eg.Go(func() error {
    84  		return ErrPipelineAborted
    85  	})
    86  }
    87  
    88  // GetInputChannel gets the input channel for a pipeline stage
    89  func (p *Pipeline) GetInputChannel(stageName string) (chan []ItemWithProps, error) {
    90  	stage, ok := p.nameToStage[stageName]
    91  
    92  	if !ok {
    93  		return nil, fmt.Errorf("%s: %w", stageName, ErrUnknownStageName)
    94  	}
    95  
    96  	return stage.inCh, nil
    97  }