github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/cogroup_test.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package bigslice_test
     6  
     7  import (
     8  	"reflect"
     9  	"sort"
    10  	"testing"
    11  
    12  	"github.com/grailbio/bigslice"
    13  	"github.com/grailbio/bigslice/slicetest"
    14  	"github.com/grailbio/bigslice/slicetype"
    15  )
    16  
    17  // sortedCogroup returns a Cogroup slice that sorts resulting groups with
    18  // ordered elements.
    19  func sortedCogroup(slices ...bigslice.Slice) bigslice.Slice {
    20  	slice := bigslice.Cogroup(slices...)
    21  	return bigslice.Map(slice, makeSortSlices(slice))
    22  }
    23  
    24  // makeSortSlices returns a map function (to be used with bigslice.Map) that sorts
    25  // any slices with ordered elements.
    26  func makeSortSlices(typ slicetype.Type) interface{} {
    27  	in := make([]reflect.Type, typ.NumOut())
    28  	for i := 0; i < typ.NumOut(); i++ {
    29  		in[i] = typ.Out(i)
    30  	}
    31  	fTyp := reflect.FuncOf(in, in, false)
    32  	f := reflect.MakeFunc(fTyp, func(args []reflect.Value) []reflect.Value {
    33  		for _, arg := range args {
    34  			if arg.Kind() != reflect.Slice {
    35  				// Ignore anything that isn't a slice.
    36  				continue
    37  			}
    38  			switch arg.Type().Elem().Kind() {
    39  			case reflect.String:
    40  			case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
    41  			case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
    42  			case reflect.Float32, reflect.Float64:
    43  			default:
    44  				// Ignore anything that isn't ordered.
    45  				continue
    46  			}
    47  			sort.Slice(arg.Interface(), func(i, j int) bool {
    48  				ai := arg.Index(i)
    49  				aj := arg.Index(j)
    50  				switch arg.Type().Elem().Kind() {
    51  				case reflect.String:
    52  					return ai.String() < aj.String()
    53  				case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
    54  					return ai.Int() < aj.Int()
    55  				case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64:
    56  					return ai.Uint() < aj.Uint()
    57  				case reflect.Float32, reflect.Float64:
    58  					return ai.Float() < aj.Float()
    59  				default:
    60  					panic("unreachable")
    61  				}
    62  			})
    63  		}
    64  		return args
    65  	})
    66  	return f.Interface()
    67  }
    68  
    69  func TestCogroup(t *testing.T) {
    70  	data1 := []interface{}{
    71  		[]string{"z", "b", "d", "d"},
    72  		[]int{1, 2, 3, 4},
    73  	}
    74  	data2 := []interface{}{
    75  		[]string{"x", "y", "z", "d"},
    76  		[]string{"one", "two", "three", "four"},
    77  	}
    78  	sharding := [][]int{{1, 1}, {1, 4}, {2, 1}, {4, 4}}
    79  	for _, shard := range sharding {
    80  		slice1 := bigslice.Const(shard[0], data1...)
    81  		slice2 := bigslice.Const(shard[1], data2...)
    82  
    83  		assertEqual(t, sortedCogroup(slice1, slice2), true,
    84  			[]string{"b", "d", "x", "y", "z"},
    85  			[][]int{{2}, {3, 4}, nil, nil, {1}},
    86  			[][]string{nil, {"four"}, {"one"}, {"two"}, {"three"}},
    87  		)
    88  		assertEqual(t, sortedCogroup(slice2, slice1), true,
    89  			[]string{"b", "d", "x", "y", "z"},
    90  			[][]string{nil, {"four"}, {"one"}, {"two"}, {"three"}},
    91  			[][]int{{2}, {3, 4}, nil, nil, {1}},
    92  		)
    93  		// Should work equally well for one slice.
    94  		assertEqual(t, sortedCogroup(slice1), true,
    95  			[]string{"b", "d", "z"},
    96  			[][]int{{2}, {3, 4}, {1}},
    97  		)
    98  		if testing.Short() {
    99  			break
   100  		}
   101  	}
   102  }
   103  
   104  func TestCogroupPrefixed(t *testing.T) {
   105  	data1 := []interface{}{
   106  		[]string{"z", "a", "a", "b", "d"},
   107  		[]int{0, 0, 0, 2, 3},
   108  		[]string{"foo", "bar", "baz", "qux", "quux"},
   109  	}
   110  	data2 := []interface{}{
   111  		[]string{"d", "a", "a", "b", "c", "d", "b"},
   112  		[]int{3, 0, 1, 1, 2, 3, 1},
   113  		[]int{0, 1, 2, 3, 4, 5, 6},
   114  	}
   115  	sharding := [][]int{{1, 1}, {1, 4}, {2, 1}, {4, 4}}
   116  	for _, shard := range sharding {
   117  		slice1 := bigslice.Const(shard[0], data1...)
   118  		slice1 = bigslice.Prefixed(slice1, 2)
   119  		slice2 := bigslice.Const(shard[1], data2...)
   120  		slice2 = bigslice.Prefixed(slice2, 2)
   121  
   122  		assertEqual(t, sortedCogroup(slice1, slice2), true,
   123  			[]string{"a", "a", "b", "b", "c", "d", "z"},
   124  			[]int{0, 1, 1, 2, 2, 3, 0},
   125  			[][]string{{"bar", "baz"}, nil, nil, {"qux"}, nil, {"quux"}, {"foo"}},
   126  			[][]int{{1}, {2}, {3, 6}, nil, {4}, {0, 5}, nil},
   127  		)
   128  		assertEqual(t, sortedCogroup(slice2, slice1), true,
   129  			[]string{"a", "a", "b", "b", "c", "d", "z"},
   130  			[]int{0, 1, 1, 2, 2, 3, 0},
   131  			[][]int{{1}, {2}, {3, 6}, nil, {4}, {0, 5}, nil},
   132  			[][]string{{"bar", "baz"}, nil, nil, {"qux"}, nil, {"quux"}, {"foo"}},
   133  		)
   134  		// Should work equally well for one slice.
   135  		assertEqual(t, sortedCogroup(slice1), true,
   136  			[]string{"a", "b", "d", "z"},
   137  			[]int{0, 2, 3, 0},
   138  			[][]string{{"bar", "baz"}, {"qux"}, {"quux"}, {"foo"}},
   139  		)
   140  		if testing.Short() {
   141  			break
   142  		}
   143  	}
   144  }
   145  
   146  func ExampleCogroup() {
   147  	slice0 := bigslice.Const(2,
   148  		[]int{0, 1, 2, 3, 0, 1},
   149  		[]string{"zero", "one", "two", "three", "cero", "uno"},
   150  	)
   151  	slice1 := bigslice.Const(2,
   152  		[]int{0, 1, 2, 3, 4, 5, 6},
   153  		[]int{0, 1, 4, 9, 16, 25, 36},
   154  	)
   155  	slice := bigslice.Cogroup(slice0, slice1)
   156  	slicetest.Print(slice)
   157  	// Output:
   158  	// 0 [cero zero] [0]
   159  	// 1 [one uno] [1]
   160  	// 2 [two] [4]
   161  	// 3 [three] [9]
   162  	// 4 [] [16]
   163  	// 5 [] [25]
   164  	// 6 [] [36]
   165  }
   166  
   167  func ExampleCogroup_one() {
   168  	slice := bigslice.Const(2,
   169  		[]int{0, 1, 2, 3, 0, 1},
   170  		[]string{"zero", "one", "two", "three", "cero", "uno"},
   171  	)
   172  	slice = bigslice.Cogroup(slice)
   173  	slicetest.Print(slice)
   174  	// Output:
   175  	// 0 [cero zero]
   176  	// 1 [one uno]
   177  	// 2 [two]
   178  	// 3 [three]
   179  }