github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/utils/pipeline/pipeline.go (about) 1 // Copyright 2020 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pipeline 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 22 "golang.org/x/sync/errgroup" 23 ) 24 25 // ErrUnknownStageName is the error returned when an unknown stage name is referenced 26 var ErrUnknownStageName = errors.New("unknown stage name") 27 28 // ErrPipelidneAborted is the error returned from Pipeline.Wait when Pipeline.Abort is called. 29 var ErrPipelineAborted = errors.New("pipeline aborted") 30 31 // Pipeline is a batch processor which takes data in batches and transforms it in stages 32 type Pipeline struct { 33 nameToStage map[string]*Stage 34 stages []*Stage 35 waitCh <-chan []ItemWithProps 36 eg *errgroup.Group 37 ctx context.Context 38 } 39 40 // NewPipeline creates a new Pipeline from an ordered slice of stages. The first stage in the pipeline must produce data 41 // and each stage will pass data on to the next stage. 42 func NewPipeline(stages ...*Stage) *Pipeline { 43 var nextInStage chan []ItemWithProps 44 45 outBatchSize := -1 46 nameToStage := make(map[string]*Stage) 47 48 p := &Pipeline{waitCh: nextInStage} 49 for i := len(stages) - 1; i >= 0; i-- { 50 nextInStage = stages[i].init(outBatchSize, nextInStage, p) 51 outBatchSize = stages[i].inBatchSize 52 53 nameToStage[stages[i].name] = stages[i] 54 } 55 56 p.eg = nil 57 p.stages = stages 58 p.nameToStage = nameToStage 59 60 return p 61 } 62 63 // Start the pipeline 64 func (p *Pipeline) Start(ctx context.Context) { 65 if p.eg != nil { 66 panic("started multiple times") 67 } 68 69 p.eg, p.ctx = errgroup.WithContext(ctx) 70 for _, stage := range p.stages { 71 stage.start(p.eg, p.ctx) 72 } 73 } 74 75 // Wait waits for the pipeline to finish 76 func (p *Pipeline) Wait() error { 77 return p.eg.Wait() 78 } 79 80 // Abort aborts the pipeline. After abort is called the pipeline will continue running closing asynchronously 81 // Use Wait() if you want to wait for the pipeline to finish closing before continuing. 82 func (p *Pipeline) Abort() { 83 p.eg.Go(func() error { 84 return ErrPipelineAborted 85 }) 86 } 87 88 // GetInputChannel gets the input channel for a pipeline stage 89 func (p *Pipeline) GetInputChannel(stageName string) (chan []ItemWithProps, error) { 90 stage, ok := p.nameToStage[stageName] 91 92 if !ok { 93 return nil, fmt.Errorf("%s: %w", stageName, ErrUnknownStageName) 94 } 95 96 return stage.inCh, nil 97 }