github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/table/pipeline/procfunc_help.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipeline
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"io"
    21  	"time"
    22  
    23  	"github.com/dolthub/go-mysql-server/sql"
    24  
    25  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    26  	"github.com/dolthub/dolt/go/libraries/doltcore/table"
    27  	"github.com/dolthub/dolt/go/libraries/doltcore/table/editor"
    28  )
    29  
    30  // SourceFunc is a function that will return a new row for each successive call until all it's rows are exhausted, at
    31  // which point io.EOF should be returned
    32  type SourceFunc func() (row.Row, ImmutableProperties, error)
    33  
    34  // ProcFuncForSourceFunc is a helper method that creates an InFunc for a given SourceFunc.  It takes care of channel
    35  // processing, stop conditions, and error handling.
    36  func ProcFuncForSourceFunc(sourceFunc SourceFunc) InFunc {
    37  	return func(p *Pipeline, ch chan<- RowWithProps, badRowChan chan<- *TransformRowFailure, noMoreChan <-chan struct{}) {
    38  		defer close(ch)
    39  
    40  		for !p.IsStopping() {
    41  			select {
    42  			case <-noMoreChan:
    43  				return
    44  			default:
    45  				break
    46  			}
    47  
    48  			r, props, err := sourceFunc()
    49  
    50  			// process read errors
    51  			if err != nil {
    52  				if err == io.EOF {
    53  					if r == nil {
    54  						return
    55  					}
    56  				} else if table.IsBadRow(err) {
    57  					badRowChan <- &TransformRowFailure{table.GetBadRowRow(err), "reader", err.Error()}
    58  				} else {
    59  					p.StopWithErr(err)
    60  					return
    61  				}
    62  			} else if r == nil {
    63  				panic("Readers should not be returning nil without error.  io.EOF should be used when done.")
    64  			}
    65  
    66  			if r != nil {
    67  				select {
    68  				case ch <- RowWithProps{r, props}:
    69  				case <-p.stopChan:
    70  					return
    71  				}
    72  			}
    73  		}
    74  	}
    75  }
    76  
    77  // ProcFuncForReader adapts a standard TableReader to work as an InFunc for a pipeline
    78  func ProcFuncForReader(ctx context.Context, rd table.TableReader) InFunc {
    79  	return ProcFuncForSourceFunc(func() (row.Row, ImmutableProperties, error) {
    80  		r, err := rd.ReadRow(ctx)
    81  
    82  		return r, NoProps, err
    83  	})
    84  }
    85  
    86  // SinkFunc is a function that will process the final transformed rows from a pipeline.  This function will be called
    87  // once for every row that makes it through the pipeline
    88  type SinkFunc func(row.Row, ReadableMap) error
    89  
    90  // ProcFuncForSinkFunc is a helper method that creates an OutFunc for a given SinkFunc.  It takes care of channel
    91  // processing, stop conditions, and error handling.
    92  func ProcFuncForSinkFunc(sinkFunc SinkFunc) OutFunc {
    93  	return func(p *Pipeline, ch <-chan RowWithProps, badRowChan chan<- *TransformRowFailure) {
    94  		for {
    95  			if p.IsStopping() {
    96  				return
    97  			}
    98  
    99  			select {
   100  			case r, ok := <-ch:
   101  				if ok {
   102  					err := sinkFunc(r.Row, r.Props)
   103  
   104  					if err != nil {
   105  						if table.IsBadRow(err) ||
   106  							sql.ErrPrimaryKeyViolation.Is(err) ||
   107  							sql.ErrUniqueKeyViolation.Is(err) ||
   108  							errors.Is(err, editor.ErrDuplicateKey) {
   109  							badRowChan <- &TransformRowFailure{r.Row, "writer", err.Error()}
   110  						} else {
   111  							p.StopWithErr(err)
   112  							return
   113  						}
   114  					}
   115  				} else {
   116  					return
   117  				}
   118  
   119  			case <-time.After(100 * time.Millisecond):
   120  				// wake up and check stop condition
   121  			}
   122  		}
   123  	}
   124  }
   125  
   126  // SourceFuncForRows returns a source func that yields the rows given in order. Suitable for very small result sets
   127  // that are statically defined or otherwise fit easily into memory.
   128  func SourceFuncForRows(rows []row.Row) SourceFunc {
   129  	idx := 0
   130  	return func() (row.Row, ImmutableProperties, error) {
   131  		if idx >= len(rows) {
   132  			return nil, NoProps, io.EOF
   133  		}
   134  		r := rows[idx]
   135  		idx++
   136  		return r, NoProps, nil
   137  	}
   138  }
   139  
   140  // ProcFuncForWriter adapts a standard TableWriter to work as an OutFunc for a pipeline
   141  func ProcFuncForWriter(ctx context.Context, wr table.TableWriter) OutFunc {
   142  	return ProcFuncForSinkFunc(func(r row.Row, props ReadableMap) error {
   143  		return wr.WriteRow(ctx, r)
   144  	})
   145  }
   146  
   147  // InFuncForChannel returns an InFunc that reads off the channel given.
   148  func InFuncForChannel(rowChan <-chan row.Row) InFunc {
   149  	return func(p *Pipeline, ch chan<- RowWithProps, badRowChan chan<- *TransformRowFailure, noMoreChan <-chan struct{}) {
   150  		defer close(ch)
   151  
   152  		more := true
   153  		for more {
   154  			if p.IsStopping() {
   155  				return
   156  			}
   157  
   158  			select {
   159  			case <-noMoreChan:
   160  				more = false
   161  			case r, ok := <-rowChan:
   162  				if ok {
   163  					ch <- RowWithProps{Row: r, Props: NoProps}
   164  				} else {
   165  					return
   166  				}
   167  			}
   168  		}
   169  
   170  		// no more data will be written to rowChan, but still need to make sure what was written is drained.
   171  		if !more {
   172  			for {
   173  				if p.IsStopping() {
   174  					return
   175  				}
   176  
   177  				select {
   178  				case r, ok := <-rowChan:
   179  					if ok {
   180  						ch <- RowWithProps{Row: r, Props: NoProps}
   181  					} else {
   182  						return
   183  					}
   184  				default:
   185  					return
   186  				}
   187  			}
   188  		}
   189  	}
   190  }