github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/table/pipeline/transform_test.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package pipeline 16 17 import ( 18 "bytes" 19 "context" 20 "fmt" 21 "io/ioutil" 22 "strconv" 23 "strings" 24 "sync" 25 "testing" 26 27 "github.com/stretchr/testify/assert" 28 29 "github.com/dolthub/dolt/go/libraries/doltcore/row" 30 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped" 31 "github.com/dolthub/dolt/go/libraries/doltcore/table/untyped/csv" 32 "github.com/dolthub/dolt/go/libraries/utils/iohelp" 33 "github.com/dolthub/dolt/go/store/types" 34 ) 35 36 var inCSV = `first,last,film or show,year 37 Tim,Allen,The Santa Clause,1994 38 Tim,Allen,The Santa Clause 2,2002 39 Tim,Allen,The Santa Clause 3: The Escape Clause,2006 40 Ed,Asner,Elf,2003 41 Ed,Asner,Christmas on the Bayou,2013 42 Ed,Asner,Elf: Buddy's Musical Christmas,2014 43 Fred,Astaire,The Man in the Santa Claus Suit,1979 44 Richard,Attenborough,Miracle on 34th Street,1994 45 Steve,Bacic,Deck the Halls,2005 46 Alec,Baldwin,Rise of the Guardians,2012 47 Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967 48 ` 49 50 var outCSV = `first,last,film or show,year,pre2000,index 51 Tim,Allen,The Santa Clause,1994,true,0 52 Tim,Allen,The Santa Clause,1994,true,1 53 Tim,Allen,The Santa Clause 2,2002,false,0 54 Tim,Allen,The Santa Clause 2,2002,false,1 55 Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,0 56 Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,1 57 Ed,Asner,Elf,2003,false,0 58 Ed,Asner,Elf,2003,false,1 59 Ed,Asner,Christmas on the Bayou,2013,false,0 60 Ed,Asner,Christmas on the Bayou,2013,false,1 61 Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,0 62 Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,1 63 Fred,Astaire,The Man in the Santa Claus Suit,1979,true,0 64 Fred,Astaire,The Man in the Santa Claus Suit,1979,true,1 65 Richard,Attenborough,Miracle on 34th Street,1994,true,0 66 Richard,Attenborough,Miracle on 34th Street,1994,true,1 67 Steve,Bacic,Deck the Halls,2005,false,0 68 Steve,Bacic,Deck the Halls,2005,false,1 69 Alec,Baldwin,Rise of the Guardians,2012,false,0 70 Alec,Baldwin,Rise of the Guardians,2012,false,1 71 Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,0 72 Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,1` 73 74 var _, schIn = untyped.NewUntypedSchema("first", "last", "film or show", "year") 75 var nameToTag, schOut = untyped.NewUntypedSchema("first", "last", "film or show", "year", "pre2000", "index") 76 77 func TestPipeline(t *testing.T) { 78 buf := bytes.NewBuffer([]byte(inCSV)) 79 outBuf := bytes.NewBuffer([]byte{}) 80 81 afterFinishCalled := false 82 afterFinishFunc := func() { 83 afterFinishCalled = true 84 } 85 86 func() { 87 csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} 88 rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo) 89 wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) 90 91 tc := NewTransformCollection( 92 NewNamedTransform("identity", identityTransFunc), 93 NewNamedTransform("label", labelTransFunc), 94 NewNamedTransform("dupe", dupeTransFunc), 95 NewNamedTransform("append", appendColumnPre2000TransFunc), 96 ) 97 98 inProcFunc := ProcFuncForReader(context.Background(), rd) 99 outProcFunc := ProcFuncForWriter(context.Background(), wr) 100 p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil) 101 102 p.RunAfter(func() { rd.Close(context.Background()) }) 103 p.RunAfter(func() { wr.Close(context.Background()) }) 104 p.RunAfter(afterFinishFunc) 105 106 p.Start() 107 p.Wait() 108 }() 109 110 assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended") 111 112 assert.Equal(t, strings.TrimSpace(outCSV), strings.TrimSpace(outBuf.String()), "output doesn't match expectation") 113 } 114 115 func TestAddingStages(t *testing.T) { 116 buf := bytes.NewBuffer([]byte(inCSV)) 117 outBuf := bytes.NewBuffer([]byte{}) 118 119 afterFinishCalled := false 120 afterFinishFunc := func() { 121 afterFinishCalled = true 122 } 123 124 func() { 125 csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} 126 rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo) 127 wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) 128 129 tc := NewTransformCollection( 130 NewNamedTransform("identity", identityTransFunc), 131 NewNamedTransform("label", labelTransFunc), 132 ) 133 134 addedStages := []NamedTransform{ 135 NewNamedTransform("dupe", dupeTransFunc), 136 NewNamedTransform("append", appendColumnPre2000TransFunc), 137 } 138 139 inProcFunc := ProcFuncForReader(context.Background(), rd) 140 outProcFunc := ProcFuncForWriter(context.Background(), wr) 141 p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil) 142 for _, stage := range addedStages { 143 p.AddStage(stage) 144 } 145 146 p.RunAfter(func() { rd.Close(context.Background()) }) 147 p.RunAfter(func() { wr.Close(context.Background()) }) 148 p.RunAfter(afterFinishFunc) 149 150 p.Start() 151 p.Wait() 152 }() 153 154 assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended") 155 156 assert.Equal(t, strings.TrimSpace(outCSV), strings.TrimSpace(outBuf.String()), "output doesn't match expectation") 157 } 158 159 func TestPartialPipeline(t *testing.T) { 160 buf := bytes.NewBuffer([]byte(inCSV)) 161 outBuf := bytes.NewBuffer([]byte{}) 162 163 afterFinishCalled := false 164 afterFinishFunc := func() { 165 afterFinishCalled = true 166 } 167 168 var newOutCsv = `first,last,film or show,year,pre2000,index 169 New,Row,InAppendStage,2999,true,0 170 AnotherNew,Row,InAppendStage,3000,true,1 171 Tim,Allen,The Santa Clause,1994,true,0 172 Tim,Allen,The Santa Clause,1994,true,1 173 Tim,Allen,The Santa Clause 2,2002,false,0 174 Tim,Allen,The Santa Clause 2,2002,false,1 175 Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,0 176 Tim,Allen,The Santa Clause 3: The Escape Clause,2006,false,1 177 Ed,Asner,Elf,2003,false,0 178 Ed,Asner,Elf,2003,false,1 179 Ed,Asner,Christmas on the Bayou,2013,false,0 180 Ed,Asner,Christmas on the Bayou,2013,false,1 181 Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,0 182 Ed,Asner,Elf: Buddy's Musical Christmas,2014,false,1 183 Fred,Astaire,The Man in the Santa Claus Suit,1979,true,0 184 Fred,Astaire,The Man in the Santa Claus Suit,1979,true,1 185 Richard,Attenborough,Miracle on 34th Street,1994,true,0 186 Richard,Attenborough,Miracle on 34th Street,1994,true,1 187 Steve,Bacic,Deck the Halls,2005,false,0 188 Steve,Bacic,Deck the Halls,2005,false,1 189 Alec,Baldwin,Rise of the Guardians,2012,false,0 190 Alec,Baldwin,Rise of the Guardians,2012,false,1 191 Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,0 192 Don,Beddoe,Bewitched (episode Humbug Not to Be Spoken Here - Season 4),1967,true,1` 193 194 func() { 195 csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} 196 rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo) 197 wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) 198 199 addedStages := []NamedTransform{ 200 NewNamedTransform("identity", identityTransFunc), 201 NewNamedTransform("label", labelTransFunc), 202 NewNamedTransform("dupe", dupeTransFunc), 203 NewNamedTransform("append", appendColumnPre2000TransFunc), 204 } 205 206 inProcFunc := ProcFuncForReader(context.Background(), rd) 207 outProcFunc := ProcFuncForWriter(context.Background(), wr) 208 209 p := NewPartialPipeline(inProcFunc) 210 for _, stage := range addedStages { 211 p.AddStage(stage) 212 } 213 214 // Can't start the pipeline until setting a sink 215 assert.Panics(t, func() { 216 p.Start() 217 }) 218 219 p.SetOutput(outProcFunc) 220 221 //New,Row,InAppendStage,2999,true,0 222 var injectedColumns = map[uint64]string{ 223 0: "New", 224 1: "Row", 225 2: "InAppendStage", 226 3: "2999", 227 4: "true", 228 5: "0", 229 } 230 injectedRow, err := untyped.NewRowFromTaggedStrings(types.Format_Default, schOut, injectedColumns) 231 assert.NoError(t, err) 232 p.InjectRow("append", injectedRow) 233 234 //AnotherNew,Row,InAppendStage,3000,true,1 235 injectedColumns = map[uint64]string{ 236 0: "AnotherNew", 237 1: "Row", 238 2: "InAppendStage", 239 3: "3000", 240 4: "true", 241 5: "1", 242 } 243 injectedRow, err = untyped.NewRowFromTaggedStrings(types.Format_Default, schOut, injectedColumns) 244 assert.NoError(t, err) 245 p.InjectRow("append", injectedRow) 246 247 p.RunAfter(func() { rd.Close(context.Background()) }) 248 p.RunAfter(func() { wr.Close(context.Background()) }) 249 p.RunAfter(afterFinishFunc) 250 251 p.Start() 252 253 // Now that the pipeline is started, other calls to set it up should panic 254 assert.Panics(t, func() { 255 p.SetOutput(func(p *Pipeline, ch <-chan RowWithProps, badRowChan chan<- *TransformRowFailure) { 256 }) 257 }) 258 assert.Panics(t, func() { 259 p.AddStage(NewNamedTransform("identity2", identityTransFunc)) 260 }) 261 assert.Panics(t, func() { 262 p.InjectRow("identity", injectedRow) 263 }) 264 265 p.Wait() 266 }() 267 268 assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended") 269 270 assert.Equal(t, strings.TrimSpace(newOutCsv), strings.TrimSpace(outBuf.String()), "output does not match expectation") 271 } 272 273 func TestAbort(t *testing.T) { 274 buf := bytes.NewBuffer([]byte(inCSV)) 275 outBuf := bytes.NewBuffer([]byte{}) 276 277 afterFinishCalled := false 278 afterFinishFunc := func() { 279 afterFinishCalled = true 280 } 281 282 func() { 283 csvInfo := &csv.CSVFileInfo{Delim: ",", HasHeaderLine: true, Columns: nil, EscapeQuotes: true} 284 rd, _ := csv.NewCSVReader(types.Format_Default, ioutil.NopCloser(buf), csvInfo) 285 wr, _ := csv.NewCSVWriter(iohelp.NopWrCloser(outBuf), schOut, csvInfo) 286 287 var wg = sync.WaitGroup{} 288 289 tc := NewTransformCollection( 290 NewNamedTransform("identity", identityTransFunc), 291 NewNamedTransform("dies", hangs(&wg)), 292 ) 293 294 inProcFunc := ProcFuncForReader(context.Background(), rd) 295 outProcFunc := ProcFuncForWriter(context.Background(), wr) 296 p := NewAsyncPipeline(inProcFunc, outProcFunc, tc, nil) 297 298 p.RunAfter(func() { rd.Close(context.Background()) }) 299 p.RunAfter(func() { wr.Close(context.Background()) }) 300 p.RunAfter(afterFinishFunc) 301 302 p.Start() 303 wg.Wait() 304 p.Abort() 305 }() 306 307 assert.True(t, afterFinishCalled, "afterFinish func not called when pipeline ended") 308 } 309 310 // Returns a function that hangs right after signalling the given WaitGroup that it's done 311 func hangs(wg *sync.WaitGroup) func(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) { 312 wg.Add(1) 313 return func(inRow row.Row, props ReadableMap) (results []*TransformedRowResult, s string) { 314 i := 0 315 fmt.Println("about to call done()") 316 wg.Done() 317 for { 318 i++ 319 } 320 } 321 } 322 323 func identityTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) { 324 return []*TransformedRowResult{{inRow, nil}}, "" 325 } 326 327 func labelTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) { 328 val, _ := inRow.GetColVal(nameToTag["year"]) 329 year, _ := strconv.ParseInt(string(val.(types.String)), 10, 32) 330 return []*TransformedRowResult{ 331 {inRow, map[string]interface{}{"pre2000": year < 2000}}, 332 }, "" 333 } 334 335 func dupeTransFunc(inRow row.Row, props ReadableMap) ([]*TransformedRowResult, string) { 336 r1, _ := inRow.SetColVal(nameToTag["index"], types.String("0"), schOut) 337 r2, _ := inRow.SetColVal(nameToTag["index"], types.String("1"), schOut) 338 return []*TransformedRowResult{ 339 {r1, map[string]interface{}{"dupe_index": 1}}, 340 {r2, map[string]interface{}{"dupe_index": 2}}, 341 }, "" 342 } 343 344 func appendColumnPre2000TransFunc(inRow row.Row, props ReadableMap) (rowData []*TransformedRowResult, badRowDetails string) { 345 labelval, _ := props.Get("pre2000") 346 347 isPre2000Str := "false" 348 if boolVal, ok := labelval.(bool); ok && boolVal { 349 isPre2000Str = "true" 350 } 351 352 // Update the column value if it's not already present 353 var r1 row.Row = inRow 354 if _, ok := inRow.GetColVal(nameToTag["pre2000"]); !ok { 355 r1, _ = inRow.SetColVal(nameToTag["pre2000"], types.String(isPre2000Str), schOut) 356 } 357 return []*TransformedRowResult{ 358 {r1, nil}, 359 }, "" 360 }