github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/cogroup_test.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package bigslice_test 6 7 import ( 8 "reflect" 9 "sort" 10 "testing" 11 12 "github.com/grailbio/bigslice" 13 "github.com/grailbio/bigslice/slicetest" 14 "github.com/grailbio/bigslice/slicetype" 15 ) 16 17 // sortedCogroup returns a Cogroup slice that sorts resulting groups with 18 // ordered elements. 19 func sortedCogroup(slices ...bigslice.Slice) bigslice.Slice { 20 slice := bigslice.Cogroup(slices...) 21 return bigslice.Map(slice, makeSortSlices(slice)) 22 } 23 24 // makeSortSlices returns a map function (to be used with bigslice.Map) that sorts 25 // any slices with ordered elements. 26 func makeSortSlices(typ slicetype.Type) interface{} { 27 in := make([]reflect.Type, typ.NumOut()) 28 for i := 0; i < typ.NumOut(); i++ { 29 in[i] = typ.Out(i) 30 } 31 fTyp := reflect.FuncOf(in, in, false) 32 f := reflect.MakeFunc(fTyp, func(args []reflect.Value) []reflect.Value { 33 for _, arg := range args { 34 if arg.Kind() != reflect.Slice { 35 // Ignore anything that isn't a slice. 36 continue 37 } 38 switch arg.Type().Elem().Kind() { 39 case reflect.String: 40 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 41 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 42 case reflect.Float32, reflect.Float64: 43 default: 44 // Ignore anything that isn't ordered. 45 continue 46 } 47 sort.Slice(arg.Interface(), func(i, j int) bool { 48 ai := arg.Index(i) 49 aj := arg.Index(j) 50 switch arg.Type().Elem().Kind() { 51 case reflect.String: 52 return ai.String() < aj.String() 53 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64: 54 return ai.Int() < aj.Int() 55 case reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64: 56 return ai.Uint() < aj.Uint() 57 case reflect.Float32, reflect.Float64: 58 return ai.Float() < aj.Float() 59 default: 60 panic("unreachable") 61 } 62 }) 63 } 64 return args 65 }) 66 return f.Interface() 67 } 68 69 func TestCogroup(t *testing.T) { 70 data1 := []interface{}{ 71 []string{"z", "b", "d", "d"}, 72 []int{1, 2, 3, 4}, 73 } 74 data2 := []interface{}{ 75 []string{"x", "y", "z", "d"}, 76 []string{"one", "two", "three", "four"}, 77 } 78 sharding := [][]int{{1, 1}, {1, 4}, {2, 1}, {4, 4}} 79 for _, shard := range sharding { 80 slice1 := bigslice.Const(shard[0], data1...) 81 slice2 := bigslice.Const(shard[1], data2...) 82 83 assertEqual(t, sortedCogroup(slice1, slice2), true, 84 []string{"b", "d", "x", "y", "z"}, 85 [][]int{{2}, {3, 4}, nil, nil, {1}}, 86 [][]string{nil, {"four"}, {"one"}, {"two"}, {"three"}}, 87 ) 88 assertEqual(t, sortedCogroup(slice2, slice1), true, 89 []string{"b", "d", "x", "y", "z"}, 90 [][]string{nil, {"four"}, {"one"}, {"two"}, {"three"}}, 91 [][]int{{2}, {3, 4}, nil, nil, {1}}, 92 ) 93 // Should work equally well for one slice. 94 assertEqual(t, sortedCogroup(slice1), true, 95 []string{"b", "d", "z"}, 96 [][]int{{2}, {3, 4}, {1}}, 97 ) 98 if testing.Short() { 99 break 100 } 101 } 102 } 103 104 func TestCogroupPrefixed(t *testing.T) { 105 data1 := []interface{}{ 106 []string{"z", "a", "a", "b", "d"}, 107 []int{0, 0, 0, 2, 3}, 108 []string{"foo", "bar", "baz", "qux", "quux"}, 109 } 110 data2 := []interface{}{ 111 []string{"d", "a", "a", "b", "c", "d", "b"}, 112 []int{3, 0, 1, 1, 2, 3, 1}, 113 []int{0, 1, 2, 3, 4, 5, 6}, 114 } 115 sharding := [][]int{{1, 1}, {1, 4}, {2, 1}, {4, 4}} 116 for _, shard := range sharding { 117 slice1 := bigslice.Const(shard[0], data1...) 118 slice1 = bigslice.Prefixed(slice1, 2) 119 slice2 := bigslice.Const(shard[1], data2...) 120 slice2 = bigslice.Prefixed(slice2, 2) 121 122 assertEqual(t, sortedCogroup(slice1, slice2), true, 123 []string{"a", "a", "b", "b", "c", "d", "z"}, 124 []int{0, 1, 1, 2, 2, 3, 0}, 125 [][]string{{"bar", "baz"}, nil, nil, {"qux"}, nil, {"quux"}, {"foo"}}, 126 [][]int{{1}, {2}, {3, 6}, nil, {4}, {0, 5}, nil}, 127 ) 128 assertEqual(t, sortedCogroup(slice2, slice1), true, 129 []string{"a", "a", "b", "b", "c", "d", "z"}, 130 []int{0, 1, 1, 2, 2, 3, 0}, 131 [][]int{{1}, {2}, {3, 6}, nil, {4}, {0, 5}, nil}, 132 [][]string{{"bar", "baz"}, nil, nil, {"qux"}, nil, {"quux"}, {"foo"}}, 133 ) 134 // Should work equally well for one slice. 135 assertEqual(t, sortedCogroup(slice1), true, 136 []string{"a", "b", "d", "z"}, 137 []int{0, 2, 3, 0}, 138 [][]string{{"bar", "baz"}, {"qux"}, {"quux"}, {"foo"}}, 139 ) 140 if testing.Short() { 141 break 142 } 143 } 144 } 145 146 func ExampleCogroup() { 147 slice0 := bigslice.Const(2, 148 []int{0, 1, 2, 3, 0, 1}, 149 []string{"zero", "one", "two", "three", "cero", "uno"}, 150 ) 151 slice1 := bigslice.Const(2, 152 []int{0, 1, 2, 3, 4, 5, 6}, 153 []int{0, 1, 4, 9, 16, 25, 36}, 154 ) 155 slice := bigslice.Cogroup(slice0, slice1) 156 slicetest.Print(slice) 157 // Output: 158 // 0 [cero zero] [0] 159 // 1 [one uno] [1] 160 // 2 [two] [4] 161 // 3 [three] [9] 162 // 4 [] [16] 163 // 5 [] [25] 164 // 6 [] [36] 165 } 166 167 func ExampleCogroup_one() { 168 slice := bigslice.Const(2, 169 []int{0, 1, 2, 3, 0, 1}, 170 []string{"zero", "one", "two", "three", "cero", "uno"}, 171 ) 172 slice = bigslice.Cogroup(slice) 173 slicetest.Print(slice) 174 // Output: 175 // 0 [cero zero] 176 // 1 [one uno] 177 // 2 [two] 178 // 3 [three] 179 }