github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/table/pipeline/transform.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipeline
    16  
    17  import (
    18  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    19  )
    20  
    21  // NamedTransform is a struct containing a TransformFunc and the name of the transform being applied.  If an error occurs
    22  // during processing this name will be provided as the TransformName in any TransformRowFailure error.
    23  type NamedTransform struct {
    24  	// The name of the transform (If an error occurs during processing this name will be provided as the TransformName
    25  	// in any TransformRowFailure error.
    26  	Name string
    27  
    28  	// Func is the TransformFunc being applied
    29  	Func TransformFunc
    30  }
    31  
    32  // NewNamedTransform returns a NamedTransform object from a name and a TransformRowFunc.  The returned NamedTransform
    33  // will have its Func member set to be a TransformFunc that handles input, output, and stop channel processing, along
    34  // with error handling and it will call the given TransformRowFunc for every row.
    35  func NewNamedTransform(name string, transRowFunc TransformRowFunc) NamedTransform {
    36  	transformer := newRowTransformer(name, transRowFunc)
    37  	return NamedTransform{name, transformer}
    38  }
    39  
    40  // TransformedRowResult is what will be returned from each stage of a transform
    41  type TransformedRowResult struct {
    42  	// RowData is the new row that should be passed on to the next stage
    43  	RowData row.Row
    44  
    45  	// PropertyUpdates are mutations that should be applied to the row's properties
    46  	PropertyUpdates map[string]interface{}
    47  }
    48  
    49  // TransformFunc reads rows from the inChan, transforms them, and then writes them to the outChan.  If an error occurs
    50  // processing a row a TransformRowFailure will be written to the failure channel, and if the stopChan is closed it should
    51  // exit all processing.
    52  type TransformFunc func(inChan <-chan RowWithProps, outChan chan<- RowWithProps, badRowChan chan<- *TransformRowFailure, stopChan <-chan struct{})
    53  
    54  // TransformRowFunc processes a single row and it's properties and can return 0 or more TransformRowResults per row. If
    55  // the row being processed is bad it should return nil, and a string containing details of the row problem.
    56  type TransformRowFunc func(inRow row.Row, props ReadableMap) (rowData []*TransformedRowResult, badRowDetails string)
    57  
    58  func newRowTransformer(name string, transRowFunc TransformRowFunc) TransformFunc {
    59  	return func(inChan <-chan RowWithProps, outChan chan<- RowWithProps, badRowChan chan<- *TransformRowFailure, stopChan <-chan struct{}) {
    60  		for {
    61  			select {
    62  			case <-stopChan:
    63  				return
    64  			default:
    65  			}
    66  
    67  			select {
    68  			case r, ok := <-inChan:
    69  				if ok {
    70  					outRowData, badRowDetails := transRowFunc(r.Row, r.Props)
    71  					outSize := len(outRowData)
    72  
    73  					for i := 0; i < outSize; i++ {
    74  						propUpdates := outRowData[i].PropertyUpdates
    75  
    76  						outProps := r.Props
    77  						if len(propUpdates) > 0 {
    78  							outProps = outProps.Set(propUpdates)
    79  						}
    80  
    81  						outRow := RowWithProps{outRowData[i].RowData, outProps}
    82  
    83  						select {
    84  						case outChan <- outRow:
    85  						case <-stopChan:
    86  							return
    87  						}
    88  					}
    89  
    90  					if badRowDetails != "" {
    91  						badRowChan <- &TransformRowFailure{r.Row, name, badRowDetails}
    92  					}
    93  				} else {
    94  					return
    95  				}
    96  
    97  			case <-stopChan:
    98  				return
    99  			}
   100  		}
   101  	}
   102  }