github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/slice_test.go

github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/slice_test.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package bigslice_test
     6  
     7  import (
     8  	"bufio"
     9  	"bytes"
    10  	"context"
    11  	"fmt"
    12  	"io/ioutil"
    13  	"math/rand"
    14  	"os"
    15  	"reflect"
    16  	"runtime"
    17  	"sort"
    18  	"strings"
    19  	"sync"
    20  	"testing"
    21  	"testing/quick"
    22  	"text/tabwriter"
    23  
    24  	fuzz "github.com/google/gofuzz"
    25  	"github.com/grailbio/base/errors"
    26  	"github.com/grailbio/base/log"
    27  	"github.com/grailbio/bigmachine/rpc"
    28  	"github.com/grailbio/bigmachine/testsystem"
    29  	"github.com/grailbio/bigslice"
    30  	"github.com/grailbio/bigslice/exec"
    31  	"github.com/grailbio/bigslice/metrics"
    32  	"github.com/grailbio/bigslice/sliceio"
    33  	"github.com/grailbio/bigslice/slicetest"
    34  	"github.com/grailbio/bigslice/typecheck"
    35  )
    36  
    37  func init() {
    38  	log.AddFlags() // so they can be used in tests
    39  }
    40  
    41  func sortColumns(columns []reflect.Value) {
    42  	s := new(columnSlice)
    43  	s.keys = columns[0].Interface().([]string)
    44  	s.swappers = make([]func(i, j int), len(columns))
    45  	for i := range columns {
    46  		s.swappers[i] = reflect.Swapper(columns[i].Interface())
    47  	}
    48  	sort.Stable(s)
    49  }
    50  
    51  type columnSlice struct {
    52  	keys     []string
    53  	swappers []func(i, j int)
    54  }
    55  
    56  func (c columnSlice) Len() int           { return len(c.keys) }
    57  func (c columnSlice) Less(i, j int) bool { return c.keys[i] < c.keys[j] }
    58  func (c columnSlice) Swap(i, j int) {
    59  	for _, swap := range c.swappers {
    60  		swap(i, j)
    61  	}
    62  }
    63  
    64  var executors = map[string]exec.Option{
    65  	"Local":           exec.Local,
    66  	"Bigmachine.Test": exec.Bigmachine(testsystem.New()),
    67  }
    68  
    69  func run(ctx context.Context, t *testing.T, slice bigslice.Slice) map[string]*sliceio.Scanner {
    70  	t.Helper()
    71  	scannerErrs := runError(ctx, t, slice)
    72  	scanners := make(map[string]*sliceio.Scanner, len(scannerErrs))
    73  	for name, scannerErr := range scannerErrs {
    74  		if err := scannerErr.Err; err != nil {
    75  			t.Errorf("executor %s error %v", name, err)
    76  		} else {
    77  			scanners[name] = scannerErr.Scanner
    78  		}
    79  	}
    80  	return scanners
    81  }
    82  
    83  type scannerErr struct {
    84  	*sliceio.Scanner
    85  	Err error
    86  }
    87  
    88  func runError(ctx context.Context, t *testing.T, slice bigslice.Slice) map[string]scannerErr {
    89  	t.Helper()
    90  	results := make(map[string]scannerErr)
    91  	fn := bigslice.Func(func() bigslice.Slice { return slice })
    92  	for name, opt := range executors {
    93  		if testing.Short() && name != "Local" {
    94  			continue
    95  		}
    96  		sess := exec.Start(opt)
    97  		// TODO(marius): faster teardown in bigmachine so that we can call this here.
    98  		// defer sess.Shutdown()
    99  		res, err := sess.Run(ctx, fn)
   100  		results[name] = scannerErr{res.Scanner(), err}
   101  	}
   102  	return results
   103  }
   104  
   105  func assertColumnsEqual(t *testing.T, sort bool, columns ...interface{}) {
   106  	t.Helper()
   107  	if len(columns)%2 != 0 {
   108  		t.Fatal("must pass even number of columns")
   109  	}
   110  	numColumns := len(columns) / 2
   111  	if numColumns < 1 {
   112  		t.Fatal("must have at least one column to compare")
   113  	}
   114  	gotCols := make([]reflect.Value, numColumns)
   115  	wantCols := make([]reflect.Value, numColumns)
   116  	for i := range columns {
   117  		j := i / 2
   118  		if i%2 == 0 {
   119  			gotCols[j] = reflect.ValueOf(columns[i])
   120  			if gotCols[j].Kind() != reflect.Slice {
   121  				t.Errorf("column %d of actual must be a slice", j)
   122  				return
   123  			}
   124  			if j > 0 && gotCols[j].Len() != gotCols[j-1].Len() {
   125  				t.Errorf("got %d, want %d columns in actual", gotCols[j].Len(), gotCols[j-1].Len())
   126  				return
   127  			}
   128  		} else {
   129  			// Problems with our expected columns are fatal, as that means that
   130  			// the test itself is incorrectly constructed.
   131  			wantCols[j] = reflect.ValueOf(columns[i])
   132  			if wantCols[j].Kind() != reflect.Slice {
   133  				t.Fatalf("column %d of expected must be a slice", j)
   134  			}
   135  			if j > 0 && wantCols[j].Len() != wantCols[j-1].Len() {
   136  				t.Fatalf("got %d, want %d columns in expected", wantCols[j].Len(), wantCols[j-1].Len())
   137  			}
   138  		}
   139  	}
   140  	if sort {
   141  		sortColumns(gotCols)
   142  		sortColumns(wantCols)
   143  	}
   144  
   145  	switch got, want := gotCols[0].Len(), wantCols[0].Len(); {
   146  	case got == want:
   147  	case got < want:
   148  		t.Errorf("short result: got %v, want %v", got, want)
   149  		return
   150  	case want < got:
   151  		row := make([]string, len(gotCols))
   152  		for i := range row {
   153  			row[i] = fmt.Sprint(gotCols[i].Index(want).Interface())
   154  		}
   155  		// Show one row of extra values to help debug.
   156  		t.Errorf("extra values: %v", strings.Join(row, ","))
   157  	}
   158  
   159  	// wantCols[0].Len() <= gotCols[0].Len() so we compare wantCols[0].Len()
   160  	// rows.
   161  	numRows := wantCols[0].Len()
   162  	got := make([]interface{}, numColumns)
   163  	want := make([]interface{}, numColumns)
   164  	for i := 0; i < numColumns; i++ {
   165  		got[i] = gotCols[i].Interface()
   166  		want[i] = wantCols[i].Interface()
   167  	}
   168  
   169  	if !reflect.DeepEqual(got, want) {
   170  		// Print full rows for small results. They are easier to interpret
   171  		// than diffs.
   172  		if numRows < 10 && numColumns < 10 {
   173  			var (
   174  				gotRows  = make([]string, numRows)
   175  				wantRows = make([]string, numRows)
   176  			)
   177  			for i := range gotRows {
   178  				var (
   179  					got  = make([]string, numColumns)
   180  					want = make([]string, numColumns)
   181  				)
   182  				for j := range got {
   183  					got[j] = fmt.Sprint(gotCols[j].Index(i).Interface())
   184  					want[j] = fmt.Sprint(wantCols[j].Index(i).Interface())
   185  				}
   186  				gotRows[i] = strings.Join(got, " ")
   187  				wantRows[i] = strings.Join(want, " ")
   188  			}
   189  			t.Errorf("result mismatch:\ngot:\n%s\nwant:\n%s", strings.Join(gotRows, "\n"), strings.Join(wantRows, "\n"))
   190  			return
   191  		}
   192  
   193  		// Print as columns
   194  		var b bytes.Buffer
   195  		var tw tabwriter.Writer
   196  		tw.Init(&b, 4, 4, 1, ' ', 0)
   197  		for i := 0; i < numRows; i++ {
   198  			var diff bool
   199  			row := make([]string, numColumns)
   200  			for j := range row {
   201  				got := gotCols[j].Index(i).Interface()
   202  				want := wantCols[j].Index(i).Interface()
   203  				if !reflect.DeepEqual(got, want) {
   204  					diff = true
   205  					row[j] = fmt.Sprintf("%v->%v", want, got)
   206  				} else {
   207  					row[j] = fmt.Sprint(got)
   208  				}
   209  			}
   210  			if diff {
   211  				fmt.Fprintf(&tw, "[%d] %s\n", i, strings.Join(row, "\t"))
   212  			}
   213  		}
   214  		tw.Flush()
   215  		t.Errorf("result mismatch:\n%s", b.String())
   216  	}
   217  }
   218  
   219  func assertEqual(t *testing.T, slice bigslice.Slice, sort bool, expect ...interface{}) {
   220  	if !testing.Short() {
   221  		rpc.InjectFailures = true
   222  		defer func() { rpc.InjectFailures = false }()
   223  	}
   224  
   225  	t.Helper()
   226  	for name, s := range run(context.Background(), t, slice) {
   227  		t.Run(name, func(t *testing.T) {
   228  			defer s.Close()
   229  			args := make([]interface{}, len(expect))
   230  			for i := range args {
   231  				// Make this one larger to make sure we exhaust the scanner.
   232  				v := reflect.ValueOf(expect[i])
   233  				slice := reflect.MakeSlice(v.Type(), v.Len()+1, v.Len()+1)
   234  				args[i] = slice.Interface()
   235  			}
   236  			n, ok := s.Scanv(context.Background(), args...)
   237  			if ok {
   238  				t.Errorf("%s: long read (%d)", name, n)
   239  			}
   240  			if err := s.Err(); err != nil {
   241  				t.Errorf("%s: %v", name, err)
   242  				return
   243  			}
   244  			for i := range args {
   245  				args[i] = reflect.ValueOf(args[i]).Slice(0, n).Interface()
   246  			}
   247  			columns := make([]interface{}, len(expect)*2)
   248  			for i := range expect {
   249  				columns[i*2] = args[i]
   250  				columns[i*2+1] = expect[i]
   251  			}
   252  			assertColumnsEqual(t, sort, columns...)
   253  		})
   254  	}
   255  }
   256  
   257  func expectTypeError(t *testing.T, message string, fn func()) {
   258  	t.Helper()
   259  	typecheck.TestCalldepth = 2
   260  	_, file, line, ok := runtime.Caller(1)
   261  	if !ok {
   262  		t.Fatal("runtime.Caller error")
   263  	}
   264  	defer func() {
   265  		t.Helper()
   266  		typecheck.TestCalldepth = 0
   267  		e := recover()
   268  		if e == nil {
   269  			t.Fatal("expected error")
   270  		}
   271  		err, ok := e.(*typecheck.Error)
   272  		if !ok {
   273  			t.Fatalf("expected typeError, got %T", e)
   274  		}
   275  		if got, want := err.File, file; got != want {
   276  			t.Errorf("got %v, want %v", got, want)
   277  		}
   278  		if got, want := err.Line, line; got != want {
   279  			t.Errorf("got %v, want %v", got, want)
   280  		}
   281  		if got, want := err.Err.Error(), message; got != want {
   282  			t.Errorf("got %q, want %q", got, want)
   283  		}
   284  	}()
   285  	fn()
   286  }
   287  
   288  type genNshard int
   289  
   290  func (genNshard) Generate(rand *rand.Rand, size int) reflect.Value {
   291  	// The number of shards must be >= 1 (guaranteed by constSlice
   292  	// construction).
   293  	return reflect.ValueOf(genNshard(rand.Intn(size) + 1))
   294  }
   295  
   296  type genNrow int
   297  
   298  func (genNrow) Generate(rand *rand.Rand, size int) reflect.Value {
   299  	return reflect.ValueOf(genNrow(rand.Intn(size)))
   300  }
   301  
   302  // TestConstShard verifies that the algorithm used to shard const slice data
   303  // behaves properly. The algorithm must provide shards that: cover the entire
   304  // data set, are within the bounds of data, are mutually exclusive, and are
   305  // distributed evenly.
   306  func TestConstShard(t *testing.T) {
   307  	f := func(gN genNrow, gNshard genNshard) bool {
   308  		var (
   309  			n       = int(gN)
   310  			nshard  = int(gNshard)
   311  			covered = make([]bool, n)
   312  			offsets = make([]int, nshard)
   313  			counts  = make([]int, nshard)
   314  		)
   315  		for shard := 0; shard < nshard; shard++ {
   316  			offset, count := bigslice.ConstShard(n, nshard, shard)
   317  			for i := offset; i < offset+count; i++ {
   318  				if i < 0 || i >= n {
   319  					// Out of bounds of data.
   320  					return false
   321  				}
   322  				if covered[i] {
   323  					// Already covered by another shard.
   324  					return false
   325  				}
   326  				covered[i] = true
   327  			}
   328  			offsets[shard] = offset
   329  			offsets[shard] = count
   330  		}
   331  		for _, c := range covered {
   332  			if !c {
   333  				// Data element that was part of no shard.
   334  				return false
   335  			}
   336  		}
   337  		if nshard == 0 {
   338  			return true
   339  		}
   340  		var (
   341  			minCount = counts[0]
   342  			maxCount = counts[0]
   343  		)
   344  		for _, c := range counts {
   345  			if c < minCount {
   346  				minCount = c
   347  			}
   348  			if c > maxCount {
   349  				maxCount = c
   350  			}
   351  		}
   352  		// Check even distribution.
   353  		return maxCount-minCount <= 1
   354  	}
   355  	// Some known edge cases.
   356  	for _, c := range []struct {
   357  		n      int
   358  		nshard int
   359  	}{
   360  		{0, 0},
   361  		{10, 10}, // Equal rows and shards.
   362  		{30, 10}, // Even multiple of shards.
   363  		{1, 10},  // More shards than elements.
   364  	} {
   365  		name := fmt.Sprintf("n:%d nshard:%d", c.n, c.nshard)
   366  		t.Run(name, func(t *testing.T) {
   367  			if !f(genNrow(c.n), genNshard(c.nshard)) {
   368  				t.Errorf("misbehaves")
   369  			}
   370  		})
   371  	}
   372  	// Random cases.
   373  	t.Run("Quick", func(t *testing.T) {
   374  		if err := quick.Check(f, nil); err != nil {
   375  			t.Error(err)
   376  		}
   377  	})
   378  }
   379  
   380  func TestConst(t *testing.T) {
   381  	const N = 10000
   382  	fz := fuzz.New()
   383  	fz.NilChance(0)
   384  	fz.NumElements(N, N)
   385  	var (
   386  		col1 []string
   387  		col2 []int
   388  	)
   389  	fz.Fuzz(&col1)
   390  	fz.Fuzz(&col2)
   391  	for nshards := 1; nshards < 20; nshards++ {
   392  		slice := bigslice.Const(nshards, col1, col2)
   393  		assertEqual(t, slice, true, col1, col2)
   394  	}
   395  }
   396  
   397  func TestConstError(t *testing.T) {
   398  	expectTypeError(t, "const: invalid slice inputs", func() { bigslice.Const(1, 123) })
   399  }
   400  
   401  func TestReaderFunc(t *testing.T) {
   402  	const (
   403  		N      = 10000
   404  		Nshard = 10
   405  	)
   406  	type state struct {
   407  		*fuzz.Fuzzer
   408  		total int
   409  	}
   410  	slice := bigslice.ReaderFunc(Nshard, func(shard int, state *state, strings []string, ints []int) (n int, err error) {
   411  		// The input should be zerod by bigslice.
   412  		var nnonzero int
   413  		for i := range strings {
   414  			if strings[i] != "" || ints[i] != 0 {
   415  				nnonzero++
   416  			}
   417  		}
   418  		if nnonzero > 0 {
   419  			t.Errorf("%d (of %d) nonzero rows", nnonzero, len(strings))
   420  		}
   421  		if state.Fuzzer == nil {
   422  			state.Fuzzer = fuzz.New()
   423  		}
   424  		state.NumElements(1, len(strings))
   425  		var (
   426  			fstrings []string
   427  			fints    []int
   428  		)
   429  		state.Fuzz(&fstrings)
   430  		state.Fuzz(&fints)
   431  		n = copy(strings, fstrings)
   432  		m := copy(ints, fints)
   433  		if m < n {
   434  			n = m
   435  		}
   436  		state.total += n
   437  		if state.total >= N {
   438  			return n - (state.total - N), sliceio.EOF
   439  		}
   440  		return n, nil
   441  	})
   442  	// Map everything to the same key so we can count them.
   443  	slice = bigslice.Map(slice, func(s string, i int) (key string, count int) { return "", 1 })
   444  	slice = bigslice.Fold(slice, func(a, e int) int { return a + e })
   445  	assertEqual(t, slice, false, []string{""}, []int{N * Nshard})
   446  }
   447  
   448  func TestReaderFuncError(t *testing.T) {
   449  	expectTypeError(t, "readerfunc: invalid reader function type func()", func() { bigslice.ReaderFunc(1, func() {}) })
   450  	expectTypeError(t, "readerfunc: invalid reader function type string", func() { bigslice.ReaderFunc(1, "invalid") })
   451  	expectTypeError(t, "readerfunc: invalid reader function type func(string, string, []int) (int, error)", func() { bigslice.ReaderFunc(1, func(shard string, state string, x []int) (int, error) { panic("") }) })
   452  	expectTypeError(t, "readerfunc: function func(int, string, []int) error does not return (int, error)", func() { bigslice.ReaderFunc(1, func(shard int, state string, x []int) error { panic("") }) })
   453  	expectTypeError(t, "readerfunc: invalid reader function type func(int, string) (int, error)", func() { bigslice.ReaderFunc(1, func(shard int, state string) (int, error) { panic("") }) })
   454  }
   455  
   456  const readerFuncForgetEOFMessage = "warning: reader func returned empty vector"
   457  
   458  // TestReaderFuncForgetEOF runs a buggy ReaderFunc that never returns sliceio.EOF. We check that
   459  // bigslice prints a warning.
   460  func TestReaderFuncForgetEOF(t *testing.T) {
   461  	var logOut bytes.Buffer
   462  	log.SetOutput(&logOut)
   463  	const N = 500
   464  	slice := bigslice.ReaderFunc(1, func(_ int, state *int, _ []int) (int, error) {
   465  		// Simulate an empty input. Users should return sliceio.EOF immediately, but some forget
   466  		// and just return nil. Eventually return EOF so the test terminates.
   467  		if *state >= N {
   468  			return 0, sliceio.EOF
   469  		}
   470  		*state++
   471  		return 0, nil
   472  	})
   473  	assertEqual(t, slice, false, []int{})
   474  	if !strings.Contains(logOut.String(), readerFuncForgetEOFMessage) {
   475  		t.Errorf("expected empty vector log message, got: %q", logOut.String())
   476  	}
   477  }
   478  
   479  // TestReaderFuncNoForgetEOF complements TestReaderFuncForgetEOF, testing that no spurious log
   480  // messages are written if reader funcs return non-empty vectors.
   481  func TestReaderFuncNoForgetEOF(t *testing.T) {
   482  	var logOut bytes.Buffer
   483  	log.SetOutput(&logOut)
   484  	const N = 500
   485  	slice := bigslice.ReaderFunc(1, func(_ int, state *int, out []int) (int, error) {
   486  		// Simulate an empty input. Users should return sliceio.EOF immediately, but some forget
   487  		// and just return nil. Eventually return EOF so the test terminates.
   488  		if *state >= N {
   489  			return 0, sliceio.EOF
   490  		}
   491  		*state++
   492  		return 1, nil
   493  	})
   494  	assertEqual(t, slice, false, make([]int, N))
   495  	if strings.Contains(logOut.String(), readerFuncForgetEOFMessage) {
   496  		t.Errorf("expected no empty vector log message, got: %q", logOut.String())
   497  	}
   498  }
   499  
   500  // TestWriterFunc tests the basic functionality of WriterFunc, verifying that
   501  // all data is passed to the write function, and all data is available in the
   502  // resulting slice.
   503  func TestWriterFunc(t *testing.T) {
   504  	const (
   505  		N      = 10000
   506  		Nshard = 10
   507  	)
   508  	fz := fuzz.New()
   509  	fz.NilChance(0)
   510  	fz.NumElements(N, N)
   511  	var (
   512  		col1 []string
   513  		col2 []int
   514  	)
   515  	fz.Fuzz(&col1)
   516  	fz.Fuzz(&col2)
   517  
   518  	slice := bigslice.Const(Nshard, col1, col2)
   519  
   520  	type state struct {
   521  		col1 []string
   522  		col2 []int
   523  		errs []error
   524  	}
   525  	var (
   526  		writerMutex sync.Mutex
   527  		// The states of the writers, by shard.
   528  		writerStates []state
   529  	)
   530  	slice = bigslice.WriterFunc(slice,
   531  		func(shard int, state *state, err error, col1 []string, col2 []int) error {
   532  			state.col1 = append(state.col1, col1...)
   533  			state.col2 = append(state.col2, col2...)
   534  			state.errs = append(state.errs, err)
   535  			if err != nil {
   536  				writerMutex.Lock()
   537  				defer writerMutex.Unlock()
   538  				writerStates[shard] = *state
   539  			}
   540  			return nil
   541  		})
   542  
   543  	// We expect both the columns written by the writer func and the columns in
   544  	// the resulting slice to match the input. We make a copy to avoid
   545  	// disturbing the inputs, as we'll end up sorting these to compare them.
   546  	wantCol1 := append([]string{}, col1...)
   547  	wantCol2 := append([]int{}, col2...)
   548  
   549  	ctx := context.Background()
   550  	fn := bigslice.Func(func() bigslice.Slice { return slice })
   551  	for name, opt := range executors {
   552  		t.Run(name, func(t *testing.T) {
   553  			// Each execution starts with a fresh state for the writer.
   554  			writerStates = make([]state, Nshard)
   555  			sess := exec.Start(opt)
   556  			res, err := sess.Run(ctx, fn)
   557  			if err != nil {
   558  				t.Errorf("executor %s error %v", name, err)
   559  				return
   560  			}
   561  
   562  			// Check the columns in the output slice.
   563  			scanner := res.Scanner()
   564  			defer scanner.Close()
   565  			var (
   566  				s       string
   567  				i       int
   568  				resCol1 []string
   569  				resCol2 []int
   570  			)
   571  			for scanner.Scan(context.Background(), &s, &i) {
   572  				resCol1 = append(resCol1, s)
   573  				resCol2 = append(resCol2, i)
   574  			}
   575  			assertColumnsEqual(t, true, resCol1, wantCol1, resCol2, wantCol2)
   576  
   577  			// Check the columns written by the writer func.
   578  			var (
   579  				writerCol1 []string
   580  				writerCol2 []int
   581  			)
   582  			for _, state := range writerStates {
   583  				writerCol1 = append(writerCol1, state.col1...)
   584  				writerCol2 = append(writerCol2, state.col2...)
   585  			}
   586  			assertColumnsEqual(t, true, writerCol1, wantCol1, writerCol2, wantCol2)
   587  
   588  			// Check that errors were passed as expected to the writer func.
   589  			for shard, state := range writerStates {
   590  				if len(state.errs) < 1 {
   591  					t.Errorf("writer for shard %d did not get EOF", shard)
   592  					continue
   593  				}
   594  				for i := 0; i < len(state.errs)-1; i++ {
   595  					if state.errs[i] != nil {
   596  						// Only the last error received should be non-nil.
   597  						t.Errorf("got premature error")
   598  						break
   599  					}
   600  				}
   601  				if got, want := state.errs[len(state.errs)-1], sliceio.EOF; got != want {
   602  					t.Errorf("got %v, want %v", got, want)
   603  				}
   604  			}
   605  		})
   606  	}
   607  }
   608  
   609  // TestWriterFuncBadFunc tests the type-checking of the writer func passed to
   610  // WriterFunc.
   611  func TestWriterFuncBadFunc(t *testing.T) {
   612  	for _, c := range []struct {
   613  		name    string
   614  		message string
   615  		f       interface{}
   616  	}{
   617  		{
   618  			"String",
   619  			"writerfunc: invalid writer function type string; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   620  			"I'm not a function at all",
   621  		},
   622  		{
   623  			"NoArguments",
   624  			"writerfunc: invalid writer function type func(); must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   625  			func() {},
   626  		},
   627  		{
   628  			"NonSliceColumn",
   629  			"writerfunc: invalid writer function type func(int, int, error, string, []int) error; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   630  			func(shard int, state int, err error, col1 string, col2 []int) error { panic("") },
   631  		},
   632  		{
   633  			"NotEnoughColumns",
   634  			"writerfunc: invalid writer function type func(int, int, error, []string) error; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   635  			func(shard int, state int, err error, col1 []string) error { panic("") },
   636  		},
   637  		{
   638  			"TooManyColumns",
   639  			"writerfunc: invalid writer function type func(int, int, error, []string, []int, []int) error; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   640  			func(shard int, state int, err error, col1 []string, col2 []int, col3 []int) error { panic("") },
   641  		},
   642  		{
   643  			"StringShard",
   644  			"writerfunc: invalid writer function type func(string, int, error, []string, []int) error; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   645  			func(shard string, state int, err error, col1 []string, col2 []int) error { panic("") },
   646  		},
   647  		{
   648  			"WrongColumnElementType",
   649  			"writerfunc: invalid writer function type func(int, int, error, []string, []string) error; must be func(shard int, state stateType, err error, col1 []string, col2 []int) error",
   650  			func(shard int, state int, err error, col1 []string, col2 []string) error { panic("") },
   651  		},
   652  		{
   653  			"NoReturn",
   654  			"writerfunc: invalid writer function type func(int, int, error, []string, []int); must return error",
   655  			func(shard int, state int, err error, col1 []string, col2 []int) { panic("") },
   656  		},
   657  		{
   658  			"ReturnInt",
   659  			"writerfunc: invalid writer function type func(int, int, error, []string, []int) int; must return error",
   660  			func(shard int, state int, err error, col1 []string, col2 []int) int { panic("") },
   661  		},
   662  	} {
   663  		t.Run(c.name, func(t *testing.T) {
   664  			slice := bigslice.Const(1, []string{}, []int{})
   665  			expectTypeError(t, c.message, func() { bigslice.WriterFunc(slice, c.f) })
   666  		})
   667  	}
   668  }
   669  
   670  // TestWriterFuncError tests the behavior of WriterFunc under various error
   671  // conditions.
   672  func TestWriterFuncError(t *testing.T) {
   673  	assertWriterErr := func(t *testing.T, slice bigslice.Slice) {
   674  		fn := bigslice.Func(func() bigslice.Slice { return slice })
   675  		for name, opt := range executors {
   676  			t.Run(name, func(t *testing.T) {
   677  				sess := exec.Start(opt)
   678  				_, err := sess.Run(context.Background(), fn)
   679  				if err == nil {
   680  					t.Errorf("expected error")
   681  				} else {
   682  					if got, want := err.Error(), "writerError"; !strings.Contains(got, want) {
   683  						t.Errorf("got %v, want %v", got, want)
   684  					}
   685  				}
   686  			})
   687  		}
   688  	}
   689  
   690  	// The write function always returns an error, so we should see it.
   691  	t.Run("WriteAlwaysErr", func(t *testing.T) {
   692  		slice := bigslice.Const(2, []string{"a", "b", "c", "d"})
   693  		slice = bigslice.WriterFunc(slice, func(shard int, state int, err error, col1 []string) error {
   694  			return errors.New("writerError")
   695  		})
   696  		assertWriterErr(t, slice)
   697  	})
   698  
   699  	// The write function returns an error when it sees the EOF. We expect to
   700  	// see the returned error, even though the underlying read succeeded
   701  	// without error.
   702  	t.Run("WriteErrOnEOF", func(t *testing.T) {
   703  		slice := bigslice.Const(2, []string{"a", "b", "c", "d"})
   704  		slice = bigslice.WriterFunc(slice, func(shard int, state int, err error, col1 []string) error {
   705  			if err == sliceio.EOF {
   706  				return errors.New("writerError")
   707  			}
   708  			return nil
   709  		})
   710  		assertWriterErr(t, slice)
   711  	})
   712  }
   713  
   714  func TestMap(t *testing.T) {
   715  	const N = 100000
   716  	input := make([]int, N)
   717  	output := make([]string, N)
   718  	for i := range input {
   719  		input[i] = i
   720  		output[i] = fmt.Sprint(i)
   721  	}
   722  	slice := bigslice.Const(1, input)
   723  	slice = bigslice.Map(slice, func(i int) string { return fmt.Sprint(i) })
   724  	assertEqual(t, slice, false, output)
   725  }
   726  
   727  func TestMapError(t *testing.T) {
   728  	input := bigslice.Const(1, []string{"x", "y"})
   729  	expectTypeError(t, "map: invalid map function int", func() { bigslice.Map(input, 123) })
   730  	expectTypeError(t, "map: function func(int) string does not match input slice type slice[1]string", func() { bigslice.Map(input, func(x int) string { return "" }) })
   731  	expectTypeError(t, "map: function func(int, int) string does not match input slice type slice[1]string", func() { bigslice.Map(input, func(x, y int) string { return "" }) })
   732  	expectTypeError(t, "map: need at least one output column", func() { bigslice.Map(input, func(x string) {}) })
   733  }
   734  
   735  func TestFilter(t *testing.T) {
   736  	const N = 100000
   737  	input := make([]int, N)
   738  	output := make([]int, N/2)
   739  	for i := range input {
   740  		input[i] = i
   741  		if i%2 == 0 {
   742  			output[i/2] = i
   743  		}
   744  	}
   745  	slice := bigslice.Const(N/1000, input)
   746  	slice = bigslice.Filter(slice, func(i int) bool { return i%2 == 0 })
   747  	assertEqual(t, slice, false, output)
   748  
   749  	slice = bigslice.Const(1, input)
   750  	slice = bigslice.Filter(slice, func(i int) bool { return false })
   751  	assertEqual(t, slice, false, []int{})
   752  
   753  	slice = bigslice.Const(1, input)
   754  	slice = bigslice.Filter(slice, func(i int) bool {
   755  		switch i {
   756  		case N / 4, N / 2, 3 * N / 4:
   757  			return true
   758  		default:
   759  			return false
   760  		}
   761  	})
   762  	assertEqual(t, slice, false, []int{N / 4, N / 2, 3 * N / 4})
   763  }
   764  
   765  func TestFilterError(t *testing.T) {
   766  	input := bigslice.Const(1, []string{"x", "y"})
   767  	expectTypeError(t, "filter: invalid predicate function int", func() { bigslice.Filter(input, 123) })
   768  	expectTypeError(t, "filter: function func(int) bool does not match input slice type slice[1]string", func() { bigslice.Filter(input, func(x int) bool { return false }) })
   769  	expectTypeError(t, "filter: function func(int, int) string does not match input slice type slice[1]string", func() { bigslice.Filter(input, func(x, y int) string { return "" }) })
   770  	expectTypeError(t, "filter: predicate must return a single boolean value", func() { bigslice.Filter(input, func(x string) {}) })
   771  	expectTypeError(t, "filter: predicate must return a single boolean value", func() { bigslice.Filter(input, func(x string) int { return 0 }) })
   772  	expectTypeError(t, "filter: predicate must return a single boolean value", func() { bigslice.Filter(input, func(x string) (bool, int) { return false, 0 }) })
   773  }
   774  
   775  func TestFlatmap(t *testing.T) {
   776  	slice := bigslice.Const(2, []string{"x,x", "y,y,y", "z", "", "x"})
   777  	slice = bigslice.Flatmap(slice, func(s string) []string {
   778  		if s == "" {
   779  			return nil
   780  		}
   781  		return strings.Split(s, ",")
   782  	})
   783  	assertEqual(t, slice, true, []string{"x", "x", "x", "y", "y", "y", "z"})
   784  
   785  	// Multiple columns
   786  	slice = bigslice.Flatmap(slice, func(s string) ([]string, []int) {
   787  		return []string{s}, []int{len(s)}
   788  	})
   789  	assertEqual(t, slice, true,
   790  		[]string{"x", "x", "x", "y", "y", "y", "z"},
   791  		[]int{1, 1, 1, 1, 1, 1, 1},
   792  	)
   793  
   794  	// Filter everything
   795  	slice = bigslice.Flatmap(slice, func(s string, i int) []string {
   796  		return nil
   797  	})
   798  	assertEqual(t, slice, true, []string{})
   799  
   800  	// Partial filter
   801  	slice = bigslice.Const(1, []int{0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10})
   802  	slice = bigslice.Flatmap(slice, func(i int) []int {
   803  		if i%2 == 0 {
   804  			return []int{i}
   805  		}
   806  		return nil
   807  	})
   808  	assertEqual(t, slice, false, []int{0, 2, 4, 6, 8, 10})
   809  
   810  	// Large slices
   811  	input := make([]string, 1024*10)
   812  	for i := range input {
   813  		input[i] = fmt.Sprint(i)
   814  	}
   815  	slice = bigslice.Const(5, input)
   816  	slice = bigslice.Flatmap(slice, func(s string) []string {
   817  		switch s {
   818  		case "1024":
   819  			return []string{s}
   820  		case "5000":
   821  			return []string{s}
   822  		default:
   823  			return nil
   824  		}
   825  	})
   826  	assertEqual(t, slice, true, []string{"1024", "5000"})
   827  }
   828  
   829  func TestFlatmapBuffered(t *testing.T) {
   830  	zeros := make([]int, 1025)
   831  	slice := bigslice.Const(1, []int{0})
   832  	slice = bigslice.Flatmap(slice, func(i int) []int {
   833  		return zeros
   834  	})
   835  	// Drive it manually:
   836  	assertEqual(t, slice, false, zeros)
   837  }
   838  
   839  func TestFlatmapError(t *testing.T) {
   840  	input := bigslice.Const(1, []int{1, 2, 3})
   841  	expectTypeError(t, "flatmap: invalid flatmap function int", func() { bigslice.Flatmap(input, 123) })
   842  	expectTypeError(t, "flatmap: flatmap function func(string) []int does not match input slice type slice[1]int", func() { bigslice.Flatmap(input, func(s string) []int { return nil }) })
   843  	expectTypeError(t, "flatmap: flatmap function func(int) int is not vectorized", func() { bigslice.Flatmap(input, func(i int) int { return 0 }) })
   844  	expectTypeError(t, "flatmap: flatmap function func(int, int) []int does not match input slice type slice[1]int", func() { bigslice.Flatmap(input, func(i, j int) []int { return nil }) })
   845  
   846  }
   847  
   848  func TestFold(t *testing.T) {
   849  	const N = 10000
   850  	fz := fuzz.New()
   851  	fz.NilChance(0)
   852  	fz.NumElements(N/2, N/2)
   853  	var (
   854  		keys   []string
   855  		values []int
   856  	)
   857  	fz.Fuzz(&keys)
   858  	fz.Fuzz(&values)
   859  	keys = append(keys, keys...)
   860  	values = append(values, values...)
   861  	slice := bigslice.Const(N/1000, keys, values)
   862  	slice = bigslice.Fold(slice, func(a, e int) int { return a + e })
   863  
   864  	expect := make(map[string]int)
   865  	for i, key := range keys {
   866  		expect[key] += values[i]
   867  	}
   868  	var (
   869  		expectKeys   []string
   870  		expectValues []int
   871  	)
   872  	for key, value := range expect {
   873  		expectKeys = append(expectKeys, key)
   874  		expectValues = append(expectValues, value)
   875  	}
   876  	assertEqual(t, slice, true, expectKeys, expectValues)
   877  
   878  	// Make sure we can partition other element types also.
   879  	slice = bigslice.Const(N/1000, values, keys)
   880  	slice = bigslice.Fold(slice, func(a int, e string) int { return a + len(e) })
   881  	slice = bigslice.Map(slice, func(key, count int) (int, int) { return 0, count })
   882  	slice = bigslice.Fold(slice, func(a, e int) int { return a + e })
   883  	var totalSize int
   884  	for _, key := range keys {
   885  		totalSize += len(key)
   886  	}
   887  	assertEqual(t, slice, false, []int{0}, []int{totalSize})
   888  }
   889  
   890  func TestFoldError(t *testing.T) {
   891  	input := bigslice.Const(1, []int{1, 2, 3})
   892  	floatInput := bigslice.Map(input, func(x int) (float64, int) { return 0, 0 })
   893  	intInput := bigslice.Map(input, func(x int) (int, int) { return 0, 0 })
   894  	expectTypeError(t, "fold: key type float64 cannot be accumulated", func() { bigslice.Fold(floatInput, func(x int) int { return 0 }) })
   895  	expectTypeError(t, "Fold can be applied only for slices with at least two columns; got 1", func() { bigslice.Fold(input, func(x int) int { return 0 }) })
   896  	expectTypeError(t, "fold: expected func(acc, t2, t3, ..., tn), got func(int) int", func() { bigslice.Fold(intInput, func(x int) int { return 0 }) })
   897  	expectTypeError(t, "fold: expected func(acc, t2, t3, ..., tn), got func(int, int) string", func() { bigslice.Fold(intInput, func(a, x int) string { return "" }) })
   898  	expectTypeError(t, "fold: fold functions must return exactly one value", func() { bigslice.Fold(intInput, func(a, x int) (int, int) { return 0, 0 }) })
   899  	expectTypeError(t, "fold: expected func(acc, t2, t3, ..., tn), got func(int, string) int", func() { bigslice.Fold(intInput, func(a int, x string) int { return 0 }) })
   900  }
   901  
   902  func TestHead(t *testing.T) {
   903  	slice := bigslice.Head(bigslice.Const(2, []int{1, 2, 3, 4, 5, 6, 7, 8, 9, 0}), 2)
   904  	assertEqual(t, slice, false, []int{1, 2, 6, 7})
   905  }
   906  
   907  // TestPrefixedPragma verifies that Prefixed slices properly adopt pragmas from
   908  // their underlying slices.
   909  func TestPrefixedPragma(t *testing.T) {
   910  	slice := bigslice.Const(2, []int{0, 1, 2}, []string{"a", "b", "c"})
   911  	slice = bigslice.Map(slice, func(i int, s string) (int, string) {
   912  		return i, s
   913  	}, bigslice.Exclusive)
   914  	slice = bigslice.Prefixed(slice, 2)
   915  	pragma, ok := slice.(bigslice.Pragma)
   916  	if !ok {
   917  		t.Fatal("Prefixed does not implement Pragma")
   918  	}
   919  	if !pragma.Exclusive() {
   920  		t.Error("Prefixed not Exclusive")
   921  	}
   922  }
   923  
   924  func TestScan(t *testing.T) {
   925  	const (
   926  		N      = 10000
   927  		Nshard = 10
   928  	)
   929  	input := make([]int, N)
   930  	for i := range input {
   931  		input[i] = i
   932  	}
   933  	var mu sync.Mutex
   934  	output := make([]int, N)
   935  	shards := make([]int, Nshard)
   936  	slice := bigslice.Const(Nshard, input)
   937  	slice = bigslice.Scan(slice, func(shard int, scan *sliceio.Scanner) error {
   938  		mu.Lock()
   939  		defer mu.Unlock()
   940  		shards[shard]++
   941  		var elem int
   942  		ctx := context.Background()
   943  		for scan.Scan(ctx, &elem) {
   944  			output[elem]++
   945  		}
   946  		return scan.Err()
   947  	})
   948  	n := len(run(context.Background(), t, slice))
   949  	for i, got := range output {
   950  		if want := n; got != want {
   951  			t.Errorf("wrong count for output %d, got %v, want %v", i, got, want)
   952  		}
   953  	}
   954  	for i, got := range shards {
   955  		if want := n; got != want {
   956  			t.Errorf("wrong count for shard %d, got %v, want %v", i, got, want)
   957  		}
   958  	}
   959  }
   960  
   961  func TestPanic(t *testing.T) {
   962  	slice := bigslice.Const(1, []int{1, 2, 3})
   963  	slice = bigslice.Map(slice, func(i int) int {
   964  		panic(i)
   965  	})
   966  	fn := bigslice.Func(func() bigslice.Slice { return slice })
   967  	ctx := context.Background()
   968  	for name, opt := range executors {
   969  		sess := exec.Start(opt)
   970  		// TODO(marius): faster teardown in bigmachine so that we can call this here.
   971  		// defer sess.Shutdown()
   972  		_, err := sess.Run(ctx, fn)
   973  		if err == nil {
   974  			t.Errorf("executor %s: expected error", name)
   975  			continue
   976  		}
   977  		if msg := err.Error(); !strings.Contains(msg, "panic while evaluating slice") {
   978  			t.Errorf("wrong error message %q", msg)
   979  		}
   980  	}
   981  }
   982  
   983  func TestEncodingError(t *testing.T) {
   984  	type ungobable struct {
   985  		x int
   986  	}
   987  	slice := bigslice.Const(1, []int{1, 2, 3})
   988  	slice = bigslice.Map(slice, func(x int) (int, ungobable) { return x, ungobable{x} })
   989  	slice = bigslice.Reduce(slice, func(a, e ungobable) ungobable { return ungobable{a.x + e.x} })
   990  
   991  	scannerErrs := runError(context.Background(), t, slice)
   992  	for name, scannerErr := range scannerErrs {
   993  		// The local executor keeps things in memory by default.
   994  		// Note thaht while, currently the Bigmachine executors will by default
   995  		// run everything through gob, this is not at all a requirement. So this
   996  		// test may begin failing in the presence of future optimizatons.
   997  		if name == "Local" {
   998  			continue
   999  		}
  1000  		err := scannerErr.Err
  1001  		if err == nil {
  1002  			t.Errorf("%s: expected error", name)
  1003  			continue
  1004  		}
  1005  		expected := errors.E(errors.Remote, errors.Fatal)
  1006  		if !errors.Match(expected, err) {
  1007  			t.Errorf("error %s: expected Remote, Fatal", err)
  1008  		}
  1009  		if !strings.Contains(err.Error(), "gob: type bigslice_test.ungobable has no exported fields") {
  1010  			t.Errorf("error %s: expected gob error", err)
  1011  		}
  1012  	}
  1013  }
  1014  
  1015  func TestMetrics(t *testing.T) {
  1016  	counter := metrics.NewCounter()
  1017  	slice := bigslice.Const(1, []int{1, 2, 3})
  1018  	slice = bigslice.Map(slice, func(ctx context.Context, i int) int {
  1019  		counter.Incr(metrics.ContextScope(ctx), int64(i))
  1020  		return i
  1021  	})
  1022  	fn := bigslice.Func(func() bigslice.Slice { return slice })
  1023  	ctx := context.Background()
  1024  	for name, opt := range executors {
  1025  		sess := exec.Start(opt)
  1026  		res, err := sess.Run(ctx, fn)
  1027  		if err != nil {
  1028  			t.Errorf("executor %s: %v", name, err)
  1029  			continue
  1030  		}
  1031  		if got, want := counter.Value(res.Scope()), int64(6); got != want {
  1032  			t.Errorf("executor %s: got %v, want %v", name, got, want)
  1033  		}
  1034  	}
  1035  
  1036  }
  1037  
  1038  func ExampleConst() {
  1039  	slice := bigslice.Const(2,
  1040  		[]int{0, 1, 2, 3},
  1041  		[]string{"zero", "one", "two", "three"},
  1042  	)
  1043  	slicetest.Print(slice)
  1044  	// Output:
  1045  	// 0 zero
  1046  	// 1 one
  1047  	// 2 two
  1048  	// 3 three
  1049  }
  1050  
  1051  func ExampleFilter() {
  1052  	slice := bigslice.Const(2,
  1053  		[]int{0, 1, 2, 3, 4, 5},
  1054  		[]string{"zero", "one", "two", "three", "four", "five"},
  1055  	)
  1056  	slice = bigslice.Filter(slice, func(x int, s string) bool {
  1057  		return x%2 == 0
  1058  	})
  1059  	slicetest.Print(slice)
  1060  	// Output:
  1061  	// 0 zero
  1062  	// 2 two
  1063  	// 4 four
  1064  }
  1065  
  1066  func ExampleFlatmap() {
  1067  	// Flatmap to split strings into words using different separators. The input
  1068  	// is of type Slice<string, string>:
  1069  	// - col0: the string
  1070  	// - col1: the separator
  1071  	//
  1072  	// The output is of type Slice<string, int>:
  1073  	// - col0: a word from the input strings
  1074  	// - col1: the length of the word
  1075  	slice := bigslice.Const(2,
  1076  		[]string{
  1077  			"Lorem ipsum dolor sit amet",
  1078  			"consectetur:adipiscing",
  1079  			"elit",
  1080  			"sed.do.eiusmod.tempor.incididunt",
  1081  		},
  1082  		[]string{" ", ":", ";", "."}, // Separators.
  1083  	)
  1084  	slice = bigslice.Flatmap(slice, func(s, sep string) ([]string, []int) {
  1085  		split := strings.Split(s, sep)
  1086  		lengths := make([]int, len(split))
  1087  		for i := range lengths {
  1088  			lengths[i] = len(split[i])
  1089  		}
  1090  		return split, lengths
  1091  	})
  1092  	slicetest.Print(slice)
  1093  	// Output:
  1094  	// Lorem 5
  1095  	// adipiscing 10
  1096  	// amet 4
  1097  	// consectetur 11
  1098  	// do 2
  1099  	// dolor 5
  1100  	// eiusmod 7
  1101  	// elit 4
  1102  	// incididunt 10
  1103  	// ipsum 5
  1104  	// sed 3
  1105  	// sit 3
  1106  	// tempor 6
  1107  }
  1108  
  1109  func ExampleFold() {
  1110  	// Fold over the input Slice<string, int, string> to accumulate a struct
  1111  	// holding:
  1112  	// - the sum of the integers in col1.
  1113  	// - the product of the integers in col1.
  1114  	// - the longest string encountered in col2.
  1115  	slice := bigslice.Const(2,
  1116  		[]string{"c", "a", "b", "c", "c", "b", "a", "a", "a", "a", "c"},
  1117  		[]int{1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11},
  1118  		[]string{
  1119  			"Lorem",
  1120  			"ipsum",
  1121  			"dolor",
  1122  			"sit",
  1123  			"amet",
  1124  			"consectetur",
  1125  			"adipiscing",
  1126  			"elit",
  1127  			"sed",
  1128  			"do",
  1129  			"eiusmod",
  1130  		},
  1131  	)
  1132  	type accum struct {
  1133  		ready bool
  1134  		// sum is the sum of integers in the second column.
  1135  		sum int
  1136  		// product is the product of integers in the second column.
  1137  		product int
  1138  		// longest is the longest string in the third column.
  1139  		longest string
  1140  	}
  1141  	slice = bigslice.Fold(slice, func(acc accum, i int, s string) accum {
  1142  		if !acc.ready {
  1143  			// Initialize product as the multiplicative identity, 1.
  1144  			acc.product = 1
  1145  			acc.ready = true
  1146  		}
  1147  		acc.sum += i
  1148  		acc.product *= i
  1149  		if len(acc.longest) < len(s) {
  1150  			acc.longest = s
  1151  		}
  1152  		return acc
  1153  	})
  1154  	slicetest.Print(slice)
  1155  	// Output:
  1156  	// a {true 36 10080 adipiscing}
  1157  	// b {true 9 18 consectetur}
  1158  	// c {true 21 220 eiusmod}
  1159  }
  1160  
  1161  func ExampleHead() {
  1162  	// Use one shard, as Head operates per shard.
  1163  	slice := bigslice.Const(1,
  1164  		[]int{0, 1, 2, 3, 4, 5},
  1165  		[]string{"zero", "one", "two", "three", "four", "five"},
  1166  	)
  1167  	slice = bigslice.Head(slice, 3)
  1168  	slicetest.Print(slice)
  1169  	// Output:
  1170  	// 0 zero
  1171  	// 1 one
  1172  	// 2 two
  1173  }
  1174  
  1175  func ExampleMap() {
  1176  	// Map an input of Slice<int, string>...:
  1177  	// - col0: an integer
  1178  	// - col1: a label for that integer
  1179  	//
  1180  	// ... to a Slice<int, string, int, string>:
  1181  	// - col0: original integer
  1182  	// - col1: original label
  1183  	// - col2: square of original integer
  1184  	// - col3: original label with ".squared" appended
  1185  	slice := bigslice.Const(2,
  1186  		[]int{0, 1, 2, 3},
  1187  		[]string{"zero", "one", "two", "three"},
  1188  	)
  1189  	slice = bigslice.Map(slice, func(x int, s string) (int, string, int, string) {
  1190  		return x, s, x * x, s + ".squared"
  1191  	})
  1192  	slicetest.Print(slice)
  1193  	// Output:
  1194  	// 0 zero 0 zero.squared
  1195  	// 1 one 1 one.squared
  1196  	// 2 two 4 two.squared
  1197  	// 3 three 9 three.squared
  1198  }
  1199  
  1200  func ExamplePrefixed() {
  1201  	// Count the number of pets of the same type and name by using Prefixed to
  1202  	// make a slice with the type and name columns as the key, then using Reduce
  1203  	// to count the number of elements that have that key.
  1204  	slice := bigslice.Const(2,
  1205  		[]string{
  1206  			"dog",
  1207  			"dog",
  1208  			"cat",
  1209  			"cat",
  1210  			"cat",
  1211  			"fish",
  1212  			"dog",
  1213  			"dog",
  1214  			"cat",
  1215  			"fish",
  1216  			"fish",
  1217  		},
  1218  		[]string{
  1219  			"spot",
  1220  			"spot",
  1221  			"mittens",
  1222  			"socks",
  1223  			"socks",
  1224  			"nemo",
  1225  			"lassie",
  1226  			"spot",
  1227  			"mittens",
  1228  			"nemo",
  1229  			"dory",
  1230  		},
  1231  		[]int{1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1},
  1232  	)
  1233  	slice = bigslice.Prefixed(slice, 2)
  1234  	slice = bigslice.Reduce(slice, func(a, b int) int { return a + b })
  1235  	slicetest.Print(slice)
  1236  	// Output:
  1237  	// cat mittens 2
  1238  	// cat socks 2
  1239  	// dog lassie 1
  1240  	// dog spot 3
  1241  	// fish dory 1
  1242  	// fish nemo 2
  1243  }
  1244  
  1245  func ExampleReaderFunc() {
  1246  	// Use ReaderFunc to make an evenly sharded Slice<int, string> from the
  1247  	// alphabet:
  1248  	// - col0: the 1-indexed index of the letter in the alphabet
  1249  	// - col1: the letter
  1250  	const numShards = 6
  1251  	const alphabet = "abcdefghijklmnopqrstuvwxyz"
  1252  	type state struct {
  1253  		// next is the index of the next element of the alphabet to be read.
  1254  		next int
  1255  	}
  1256  	slice := bigslice.ReaderFunc(numShards,
  1257  		func(shard int, s *state, is []int, ss []string) (int, error) {
  1258  			// Each shard will handle a portion of the alphabet.
  1259  			// Shard 0 reads letters 1, 7, 13, ....
  1260  			// Shard 1 reads letters 2, 8, 14, ....
  1261  			// ...
  1262  			// Shard 5 reads letters 6, 12, 18, ....
  1263  			if s.next == 0 {
  1264  				// This is the first call, so we initialize our state.
  1265  				s.next = shard + 1
  1266  			}
  1267  			for n := 0; ; n++ {
  1268  				if len(alphabet) < s.next {
  1269  					// Our shard is complete, so return EOF.
  1270  					return n, sliceio.EOF
  1271  				}
  1272  				if n == len(is) {
  1273  					// We have filled the passed buffers, so there is nothing
  1274  					// left to do in this invocation.
  1275  					return n, nil
  1276  				}
  1277  				is[n] = s.next
  1278  				ss[n] = string(alphabet[s.next-1])
  1279  				s.next += numShards
  1280  			}
  1281  		})
  1282  	slicetest.Print(slice)
  1283  	// Output:
  1284  	// 1 a
  1285  	// 2 b
  1286  	// 3 c
  1287  	// 4 d
  1288  	// 5 e
  1289  	// 6 f
  1290  	// 7 g
  1291  	// 8 h
  1292  	// 9 i
  1293  	// 10 j
  1294  	// 11 k
  1295  	// 12 l
  1296  	// 13 m
  1297  	// 14 n
  1298  	// 15 o
  1299  	// 16 p
  1300  	// 17 q
  1301  	// 18 r
  1302  	// 19 s
  1303  	// 20 t
  1304  	// 21 u
  1305  	// 22 v
  1306  	// 23 w
  1307  	// 24 x
  1308  	// 25 y
  1309  	// 26 z
  1310  }
  1311  
  1312  func ExampleScan() {
  1313  	// Use Scan to write a file for each shard of the input. Each file will
  1314  	// contain a line for each row in the shard.
  1315  	const numShards = 2
  1316  	slice := bigslice.Const(numShards,
  1317  		[]string{"a", "b", "c", "a", "b", "c"},
  1318  		[]int{3, 3, 2, 2, 1, 1},
  1319  	)
  1320  	// For this simple example, use shared memory to store the paths to these
  1321  	// files so that we can easily aggregate the files for output. If we were
  1322  	// distributing this computation across machines without access to shared
  1323  	// memory, we'd need to use a different mechanism, e.g. write files to a
  1324  	// common backing store with a known prefix.
  1325  	shardPaths := make([]string, numShards)
  1326  	slice = bigslice.Scan(slice,
  1327  		func(shard int, scanner *sliceio.Scanner) error {
  1328  			file, err := ioutil.TempFile("", "example-scan")
  1329  			if err != nil {
  1330  				return fmt.Errorf("could not open temp file: %v", err)
  1331  			}
  1332  			shardPaths[shard] = file.Name()
  1333  			var (
  1334  				s string
  1335  				x int
  1336  			)
  1337  			for scanner.Scan(context.Background(), &s, &x) {
  1338  				// Write a line in the file with the labeled elements of the
  1339  				// row.
  1340  				line := fmt.Sprintf("s:%s x:%d\n", s, x)
  1341  				if _, err = file.WriteString(line); err != nil {
  1342  					return fmt.Errorf("error writing file %s: %v", file.Name(), err)
  1343  				}
  1344  			}
  1345  			if err = file.Close(); err != nil {
  1346  				return fmt.Errorf("error closing file: %v", err)
  1347  			}
  1348  			return scanner.Err()
  1349  		},
  1350  	)
  1351  	// Print the resulting slice. This forces evaluation of the slice. Notice
  1352  	// that this prints no output because slice is empty. Scanning consumes the
  1353  	// slice.
  1354  	fmt.Println("# slice")
  1355  	slicetest.Print(slice)
  1356  
  1357  	// slicetest.Print evaluates the slice, so we now make sure to clean up
  1358  	// after ourselves.
  1359  	for _, path := range shardPaths {
  1360  		defer os.Remove(path)
  1361  	}
  1362  	fmt.Println("# lines by shard")
  1363  	for shard, path := range shardPaths {
  1364  		fmt.Printf("## shard %d\n", shard)
  1365  		// Read and sort the lines for deterministic output.
  1366  		var lines []string
  1367  		file, err := os.Open(path)
  1368  		if err != nil {
  1369  			log.Fatalf("error opening %s for reading: %v", path, err)
  1370  		}
  1371  		scanner := bufio.NewScanner(file)
  1372  		for scanner.Scan() {
  1373  			lines = append(lines, scanner.Text())
  1374  		}
  1375  		if scannerErr := scanner.Err(); scannerErr != nil {
  1376  			log.Fatalf("error scanning %s: %v", path, scannerErr)
  1377  		}
  1378  		sort.Strings(lines)
  1379  		for _, line := range lines {
  1380  			fmt.Println(line)
  1381  		}
  1382  	}
  1383  	// Output:
  1384  	// # slice
  1385  	// # lines by shard
  1386  	// ## shard 0
  1387  	// s:a x:3
  1388  	// s:b x:3
  1389  	// s:c x:2
  1390  	// ## shard 1
  1391  	// s:a x:2
  1392  	// s:b x:1
  1393  	// s:c x:1
  1394  }
  1395  
  1396  func ExampleWriterFunc() {
  1397  	// Use WriterFunc to write a file for each shard of the input. Each file
  1398  	// will contain a line for each row in the shard.
  1399  	const numShards = 2
  1400  	slice := bigslice.Const(numShards,
  1401  		[]string{"a", "b", "c", "a", "b", "c"},
  1402  		[]int{3, 3, 2, 2, 1, 1},
  1403  	)
  1404  	// For this simple example, use shared memory to store the paths to these
  1405  	// files so that we can easily aggregate the files for output. If we were
  1406  	// distributing this computation across machines without access to shared
  1407  	// memory, we'd need to use a different mechanism, e.g. write files to a
  1408  	// common backing store with a known prefix.
  1409  	shardPaths := make([]string, numShards)
  1410  	type writeState struct {
  1411  		file *os.File
  1412  	}
  1413  	slice = bigslice.WriterFunc(slice,
  1414  		func(shard int, state *writeState, readErr error, ss []string, xs []int) error {
  1415  			if state.file == nil {
  1416  				// First call; initialize state.
  1417  				var err error
  1418  				if state.file, err = ioutil.TempFile("", "example-writer-func"); err != nil {
  1419  					return fmt.Errorf("could not open temp file: %v", err)
  1420  				}
  1421  				shardPaths[shard] = state.file.Name()
  1422  			}
  1423  			for i := range ss {
  1424  				// We can safely assume that ss and xs are of equal length.
  1425  				s := ss[i]
  1426  				x := xs[i]
  1427  				// Write a line in the file with the labeled elements of the
  1428  				// row.
  1429  				line := fmt.Sprintf("s:%s x:%d\n", s, x)
  1430  				if _, err := state.file.WriteString(line); err != nil {
  1431  					return fmt.Errorf("error writing file: %v", err)
  1432  				}
  1433  			}
  1434  			if readErr != nil {
  1435  				// No more data is coming, so we close our file.
  1436  				if err := state.file.Close(); err != nil {
  1437  					return fmt.Errorf("error closing file: %v", err)
  1438  				}
  1439  			}
  1440  			return nil
  1441  		},
  1442  	)
  1443  	// Note that the slice passes through unadulterated.
  1444  	fmt.Println("# slice")
  1445  	slicetest.Print(slice)
  1446  
  1447  	// slicetest.Print evaluates the slice, so we now make sure to clean up
  1448  	// after ourselves.
  1449  	for _, path := range shardPaths {
  1450  		defer os.Remove(path)
  1451  	}
  1452  	fmt.Println("# lines by shard")
  1453  	for shard, path := range shardPaths {
  1454  		fmt.Printf("## shard %d\n", shard)
  1455  		// Read and sort the lines for deterministic output.
  1456  		var lines []string
  1457  		file, err := os.Open(path)
  1458  		if err != nil {
  1459  			log.Fatalf("error opening %s for reading: %v", path, err)
  1460  		}
  1461  		scanner := bufio.NewScanner(file)
  1462  		for scanner.Scan() {
  1463  			lines = append(lines, scanner.Text())
  1464  		}
  1465  		if scannerErr := scanner.Err(); scannerErr != nil {
  1466  			log.Fatalf("error scanning %s: %v", path, scannerErr)
  1467  		}
  1468  		sort.Strings(lines)
  1469  		for _, line := range lines {
  1470  			fmt.Println(line)
  1471  		}
  1472  	}
  1473  	// Output:
  1474  	// # slice
  1475  	// a 2
  1476  	// a 3
  1477  	// b 1
  1478  	// b 3
  1479  	// c 1
  1480  	// c 2
  1481  	// # lines by shard
  1482  	// ## shard 0
  1483  	// s:a x:3
  1484  	// s:b x:3
  1485  	// s:c x:2
  1486  	// ## shard 1
  1487  	// s:a x:2
  1488  	// s:b x:1
  1489  	// s:c x:1
  1490  }