github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/table/pipeline/transform_test.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package pipeline
    16  
    17  import (
    18  	"bytes"
    19  	"context"
    20  	"fmt"
    21  	"io/ioutil"
    22  	"strconv"
    23  	"strings"
    24  	"sync"
    25  	"testing"
    26  
    27  	"github.com/stretchr/testify/assert"
    28  
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv"
    32  	"github.com/dolthub/dolt/go/libraries/utils/iohelp"
    33  	"github.com/dolthub/dolt/go/store/types"
    34  )
    35  
    36  var inCSV = `first,last,film or show,year
    37  Tim,Allen,The Santa Clause,1994
    38  Tim,Allen,The Santa Clause 2,2002
    39  Tim,Allen,The Santa Clause 3: The Escape Clause,2006
    40  Ed,Asner,Elf,2003
    41  Ed,Asner,Christmas on the Bayou,2013
    42  Ed,Asner,Elf: Buddy's Musical Christmas,2014
    43  Fred,Astaire,The Man in the Santa Claus Suit,1979
    44  Richard,Attenborough,Miracle on 34th Street,1994
    45  Steve,Bacic,Deck the Halls,2005
    46  Alec,Baldwin,Rise of the Guardians,2012
    47  Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967
    48  `
    49  
    50  var outCSV = `first,last,film or show,year,pre2000,index
    51  Tim,Allen,The Santa Clause,1994,true,0
    52  Tim,Allen,The Santa Clause,1994,true,1
    53  Tim,Allen,The Santa Clause 2,2002,false,0
    54  Tim,Allen,The Santa Clause 2,2002,false,1
    55  Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,0
    56  Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,1
    57  Ed,Asner,Elf,2003,false,0
    58  Ed,Asner,Elf,2003,false,1
    59  Ed,Asner,Christmas on the Bayou,2013,false,0
    60  Ed,Asner,Christmas on the Bayou,2013,false,1
    61  Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,0
    62  Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,1
    63  Fred,Astaire,The Man in the Santa Claus Suit,1979,true,0
    64  Fred,Astaire,The Man in the Santa Claus Suit,1979,true,1
    65  Richard,Attenborough,Miracle on 34th Street,1994,true,0
    66  Richard,Attenborough,Miracle on 34th Street,1994,true,1
    67  Steve,Bacic,Deck the Halls,2005,false,0
    68  Steve,Bacic,Deck the Halls,2005,false,1
    69  Alec,Baldwin,Rise of the Guardians,2012,false,0
    70  Alec,Baldwin,Rise of the Guardians,2012,false,1
    71  Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,0
    72  Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,1`
    73  
    74  var _, schIn = untyped.NewUntypedSchema("first", "last", "film or show", "year")
    75  var nameToTag, schOut = untyped.NewUntypedSchema("first", "last", "film or show", "year", "pre2000", "index")
    76  
    77  func TestPipeline(t *testing.T) {
    78  	buf := bytes.NewBuffer([]byte(inCSV))
    79  	outBuf := bytes.NewBuffer([]byte{})
    80  
    81  	afterFinishCalled := false
    82  	afterFinishFunc := func() {
    83  		afterFinishCalled = true
    84  	}
    85  
    86  	func() {
    87  		csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true}
    88  		rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo)
    89  		wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo)
    90  
    91  		tc := NewTransformCollection(
    92  			NewNamedTransform("identity", identityTransFunc),
    93  			NewNamedTransform("label", labelTransFunc),
    94  			NewNamedTransform("dupe", dupeTransFunc),
    95  			NewNamedTransform("append", appendColumnPre2000TransFunc),
    96  		)
    97  
    98  		inProcFunc := ProcFuncForReader(context.Background(), rd)
    99  		outProcFunc := ProcFuncForWriter(context.Background(), wr)
   100  		p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil)
   101  
   102  		p.RunAfter(func() { rd.Close(context.Background()) })
   103  		p.RunAfter(func() { wr.Close(context.Background()) })
   104  		p.RunAfter(afterFinishFunc)
   105  
   106  		p.Start()
   107  		p.Wait()
   108  	}()
   109  
   110  	assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended")
   111  
   112  	assert.Equal(t, strings.TrimSpace(outCSV), strings.TrimSpace(outBuf.String()), "output doesn't match expectation")
   113  }
   114  
   115  func TestAddingStages(t *testing.T) {
   116  	buf := bytes.NewBuffer([]byte(inCSV))
   117  	outBuf := bytes.NewBuffer([]byte{})
   118  
   119  	afterFinishCalled := false
   120  	afterFinishFunc := func() {
   121  		afterFinishCalled = true
   122  	}
   123  
   124  	func() {
   125  		csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true}
   126  		rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo)
   127  		wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo)
   128  
   129  		tc := NewTransformCollection(
   130  			NewNamedTransform("identity", identityTransFunc),
   131  			NewNamedTransform("label", labelTransFunc),
   132  		)
   133  
   134  		addedStages := []NamedTransform{
   135  			NewNamedTransform("dupe", dupeTransFunc),
   136  			NewNamedTransform("append", appendColumnPre2000TransFunc),
   137  		}
   138  
   139  		inProcFunc := ProcFuncForReader(context.Background(), rd)
   140  		outProcFunc := ProcFuncForWriter(context.Background(), wr)
   141  		p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil)
   142  		for _, stage := range addedStages {
   143  			p.AddStage(stage)
   144  		}
   145  
   146  		p.RunAfter(func() { rd.Close(context.Background()) })
   147  		p.RunAfter(func() { wr.Close(context.Background()) })
   148  		p.RunAfter(afterFinishFunc)
   149  
   150  		p.Start()
   151  		p.Wait()
   152  	}()
   153  
   154  	assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended")
   155  
   156  	assert.Equal(t, strings.TrimSpace(outCSV), strings.TrimSpace(outBuf.String()), "output doesn't match expectation")
   157  }
   158  
   159  func TestPartialPipeline(t *testing.T) {
   160  	buf := bytes.NewBuffer([]byte(inCSV))
   161  	outBuf := bytes.NewBuffer([]byte{})
   162  
   163  	afterFinishCalled := false
   164  	afterFinishFunc := func() {
   165  		afterFinishCalled = true
   166  	}
   167  
   168  	var newOutCsv = `first,last,film or show,year,pre2000,index
   169  New,Row,InAppendStage,2999,true,0
   170  AnotherNew,Row,InAppendStage,3000,true,1
   171  Tim,Allen,The Santa Clause,1994,true,0
   172  Tim,Allen,The Santa Clause,1994,true,1
   173  Tim,Allen,The Santa Clause 2,2002,false,0
   174  Tim,Allen,The Santa Clause 2,2002,false,1
   175  Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,0
   176  Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,1
   177  Ed,Asner,Elf,2003,false,0
   178  Ed,Asner,Elf,2003,false,1
   179  Ed,Asner,Christmas on the Bayou,2013,false,0
   180  Ed,Asner,Christmas on the Bayou,2013,false,1
   181  Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,0
   182  Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,1
   183  Fred,Astaire,The Man in the Santa Claus Suit,1979,true,0
   184  Fred,Astaire,The Man in the Santa Claus Suit,1979,true,1
   185  Richard,Attenborough,Miracle on 34th Street,1994,true,0
   186  Richard,Attenborough,Miracle on 34th Street,1994,true,1
   187  Steve,Bacic,Deck the Halls,2005,false,0
   188  Steve,Bacic,Deck the Halls,2005,false,1
   189  Alec,Baldwin,Rise of the Guardians,2012,false,0
   190  Alec,Baldwin,Rise of the Guardians,2012,false,1
   191  Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,0
   192  Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,1`
   193  
   194  	func() {
   195  		csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true}
   196  		rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo)
   197  		wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo)
   198  
   199  		addedStages := []NamedTransform{
   200  			NewNamedTransform("identity", identityTransFunc),
   201  			NewNamedTransform("label", labelTransFunc),
   202  			NewNamedTransform("dupe", dupeTransFunc),
   203  			NewNamedTransform("append", appendColumnPre2000TransFunc),
   204  		}
   205  
   206  		inProcFunc := ProcFuncForReader(context.Background(), rd)
   207  		outProcFunc := ProcFuncForWriter(context.Background(), wr)
   208  
   209  		p := NewPartialPipeline(inProcFunc)
   210  		for _, stage := range addedStages {
   211  			p.AddStage(stage)
   212  		}
   213  
   214  		// Can't start the pipeline until setting a sink
   215  		assert.Panics(t, func() {
   216  			p.Start()
   217  		})
   218  
   219  		p.SetOutput(outProcFunc)
   220  
   221  		//New,Row,InAppendStage,2999,true,0
   222  		var injectedColumns = map[uint64]string{
   223  			0: "New",
   224  			1: "Row",
   225  			2: "InAppendStage",
   226  			3: "2999",
   227  			4: "true",
   228  			5: "0",
   229  		}
   230  		injectedRow, err := untyped.NewRowFromTaggedStrings(types.Format_Default, schOut, injectedColumns)
   231  		assert.NoError(t, err)
   232  		p.InjectRow("append", injectedRow)
   233  
   234  		//AnotherNew,Row,InAppendStage,3000,true,1
   235  		injectedColumns = map[uint64]string{
   236  			0: "AnotherNew",
   237  			1: "Row",
   238  			2: "InAppendStage",
   239  			3: "3000",
   240  			4: "true",
   241  			5: "1",
   242  		}
   243  		injectedRow, err = untyped.NewRowFromTaggedStrings(types.Format_Default, schOut, injectedColumns)
   244  		assert.NoError(t, err)
   245  		p.InjectRow("append", injectedRow)
   246  
   247  		p.RunAfter(func() { rd.Close(context.Background()) })
   248  		p.RunAfter(func() { wr.Close(context.Background()) })
   249  		p.RunAfter(afterFinishFunc)
   250  
   251  		p.Start()
   252  
   253  		// Now that the pipeline is started, other calls to set it up should panic
   254  		assert.Panics(t, func() {
   255  			p.SetOutput(func(p *Pipeline, ch <-chan RowWithProps, badRowChan chan<- *TransformRowFailure) {
   256  			})
   257  		})
   258  		assert.Panics(t, func() {
   259  			p.AddStage(NewNamedTransform("identity2", identityTransFunc))
   260  		})
   261  		assert.Panics(t, func() {
   262  			p.InjectRow("identity", injectedRow)
   263  		})
   264  
   265  		p.Wait()
   266  	}()
   267  
   268  	assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended")
   269  
   270  	assert.Equal(t, strings.TrimSpace(newOutCsv), strings.TrimSpace(outBuf.String()), "output does not match expectation")
   271  }
   272  
   273  func TestAbort(t *testing.T) {
   274  	buf := bytes.NewBuffer([]byte(inCSV))
   275  	outBuf := bytes.NewBuffer([]byte{})
   276  
   277  	afterFinishCalled := false
   278  	afterFinishFunc := func() {
   279  		afterFinishCalled = true
   280  	}
   281  
   282  	func() {
   283  		csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true}
   284  		rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo)
   285  		wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo)
   286  
   287  		var wg = sync.WaitGroup{}
   288  
   289  		tc := NewTransformCollection(
   290  			NewNamedTransform("identity", identityTransFunc),
   291  			NewNamedTransform("dies", hangs(&wg)),
   292  		)
   293  
   294  		inProcFunc := ProcFuncForReader(context.Background(), rd)
   295  		outProcFunc := ProcFuncForWriter(context.Background(), wr)
   296  		p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil)
   297  
   298  		p.RunAfter(func() { rd.Close(context.Background()) })
   299  		p.RunAfter(func() { wr.Close(context.Background()) })
   300  		p.RunAfter(afterFinishFunc)
   301  
   302  		p.Start()
   303  		wg.Wait()
   304  		p.Abort()
   305  	}()
   306  
   307  	assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended")
   308  }
   309  
   310  // Returns a function that hangs right after signalling the given WaitGroup that it's done
   311  func hangs(wg *sync.WaitGroup) func(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) {
   312  	wg.Add(1)
   313  	return func(inRow row.Row, props ReadableMap) (results []*TransformedRowResult, s string) {
   314  		i := 0
   315  		fmt.Println("about to call done()")
   316  		wg.Done()
   317  		for {
   318  			i++
   319  		}
   320  	}
   321  }
   322  
   323  func identityTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) {
   324  	return []*TransformedRowResult{{inRow, nil}}, ""
   325  }
   326  
   327  func labelTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) {
   328  	val, _ := inRow.GetColVal(nameToTag["year"])
   329  	year, _ := strconv.ParseInt(string(val.(types.String)), 10, 32)
   330  	return []*TransformedRowResult{
   331  		{inRow, map[string]interface{}{"pre2000": year < 2000}},
   332  	}, ""
   333  }
   334  
   335  func dupeTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) {
   336  	r1, _ := inRow.SetColVal(nameToTag["index"], types.String("0"), schOut)
   337  	r2, _ := inRow.SetColVal(nameToTag["index"], types.String("1"), schOut)
   338  	return []*TransformedRowResult{
   339  		{r1, map[string]interface{}{"dupe_index": 1}},
   340  		{r2, map[string]interface{}{"dupe_index": 2}},
   341  	}, ""
   342  }
   343  
   344  func appendColumnPre2000TransFunc(inRow row.Row, props ReadableMap) (rowData []*TransformedRowResult, badRowDetails string) {
   345  	labelval, _ := props.Get("pre2000")
   346  
   347  	isPre2000Str := "false"
   348  	if boolVal, ok := labelval.(bool); ok && boolVal {
   349  		isPre2000Str = "true"
   350  	}
   351  
   352  	// Update the column value if it's not already present
   353  	var r1 row.Row = inRow
   354  	if _, ok := inRow.GetColVal(nameToTag["pre2000"]); !ok {
   355  		r1, _ = inRow.SetColVal(nameToTag["pre2000"], types.String(isPre2000Str), schOut)
   356  	}
   357  	return []*TransformedRowResult{
   358  		{r1, nil},
   359  	}, ""
   360  }