github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/sortio/sort_test.go (about) 1 // Copyright 2018 GRAIL, Inc. All rights reserved. 2 // Use of this source code is governed by the Apache 2.0 3 // license that can be found in the LICENSE file. 4 5 package sortio 6 7 import ( 8 "context" 9 "reflect" 10 "sort" 11 "testing" 12 13 fuzz "github.com/google/gofuzz" 14 "github.com/grailbio/bigslice/frame" 15 "github.com/grailbio/bigslice/slicefunc" 16 "github.com/grailbio/bigslice/sliceio" 17 "github.com/grailbio/bigslice/slicetype" 18 ) 19 20 var ( 21 typeOfString = reflect.TypeOf("") 22 typeOfInt = reflect.TypeOf(0) 23 typeOfSliceOfString = reflect.SliceOf(typeOfString) 24 typeOfSliceOfInt = reflect.SliceOf(typeOfInt) 25 ) 26 27 // FuzzFrame creates a fuzzed frame of length n, where columns 28 // have the provided types. 29 func fuzzFrame(fz *fuzz.Fuzzer, n int, types ...reflect.Type) frame.Frame { 30 f := frame.Make(slicetype.New(types...), n, n) 31 for i := 0; i < f.NumOut(); i++ { 32 vp := reflect.New(types[i]) 33 for j := 0; j < n; j++ { 34 fz.Fuzz(vp.Interface()) 35 f.Index(i, j).Set(vp.Elem()) 36 } 37 } 38 return f 39 } 40 41 type fuzzReader struct { 42 Fuzz *fuzz.Fuzzer 43 N int 44 All frame.Frame 45 } 46 47 func (f *fuzzReader) Read(ctx context.Context, out frame.Frame) (int, error) { 48 if f.N == 0 { 49 return 0, sliceio.EOF 50 } 51 n := out.Len() 52 if f.N < n { 53 n = f.N 54 } 55 f.N -= n 56 for i := 0; i < out.NumOut(); i++ { 57 vp := reflect.New(out.Out(i)) 58 for j := 0; j < n; j++ { 59 f.Fuzz.Fuzz(vp.Interface()) 60 out.Index(i, j).Set(vp.Elem()) 61 } 62 } 63 f.All = frame.AppendFrame(f.All, out.Slice(0, n)) 64 return n, nil 65 } 66 67 func TestSort(t *testing.T) { 68 fz := fuzz.NewWithSeed(31415) 69 70 f := fuzzFrame(fz, 1000, typeOfString, typeOfString, typeOfString) 71 // Replace the third column with the concatenation of the two first 72 // columns so we can verify that the full rows are swapped. 73 for i := 0; i < f.Len(); i++ { 74 f.Index(2, i).SetString(f.Index(0, i).String() + f.Index(1, i).String()) 75 } 76 if sort.IsSorted(f) { 77 t.Fatal("unlikely") 78 } 79 sort.Sort(f) 80 if !sort.IsSorted(f) { 81 t.Fatal("frame did not sort") 82 } 83 // Make sure that the full rows are swapped. 84 for i := 0; i < f.Len(); i++ { 85 if got, want := f.Index(2, i).String(), f.Index(0, i).String()+f.Index(1, i).String(); got != want { 86 t.Errorf("row %d: got %v, want %v", i, got, want) 87 } 88 } 89 } 90 91 func TestMergeReader(t *testing.T) { 92 fz := fuzz.NewWithSeed(12345) 93 const ( 94 N = 1000 95 M = 100 96 ) 97 98 var ( 99 frames = make([]frame.Frame, M) 100 readers = make([]sliceio.Reader, M) 101 ) 102 for i := range frames { 103 f := fuzzFrame(fz, N, typeOfString, typeOfString, typeOfSliceOfString) 104 // Replace the third column with a slice of of the two first 105 // columns so we can verify that the full rows are swapped. 106 for j := 0; j < f.Len(); j++ { 107 s := reflect.MakeSlice(typeOfSliceOfString, 2, 2) 108 s.Index(0).Set(f.Index(0, j)) 109 s.Index(1).Set(f.Index(1, j)) 110 f.Index(2, j).Set(s) 111 } 112 sort.Sort(f) 113 frames[i] = f 114 readers[i] = sliceio.FrameReader(f) 115 } 116 117 ctx := context.Background() 118 m, err := NewMergeReader(ctx, frames[0], readers) 119 if err != nil { 120 t.Fatal(err) 121 } 122 123 out := frame.Make(frames[0], N*M, N*M) 124 n, err := sliceio.ReadFull(ctx, m, out) 125 if err != nil && err != sliceio.EOF { 126 t.Fatal(err) 127 } 128 if got, want := n, N*M; got != want { 129 t.Errorf("got %v, want %v", got, want) 130 } 131 if !sort.IsSorted(out) { 132 t.Error("frame not sorted") 133 } 134 var ( 135 a = out.Interface(0).([]string) 136 b = out.Interface(1).([]string) 137 c = out.Interface(2).([][]string) 138 ) 139 for i := range a { 140 if got, want := len(c[i]), 2; got != want { 141 t.Errorf("got %v, want %v for key %v", got, want, i) 142 continue 143 } 144 if got, want := c[i][0], a[i]; got != want { 145 t.Errorf("got %v, want %v for key %v", got, want, i) 146 } 147 if got, want := c[i][1], b[i]; got != want { 148 t.Errorf("got %v, want %v for key %v", got, want, i) 149 } 150 } 151 n, err = sliceio.ReadFull(ctx, m, out) 152 if got, want := err, sliceio.EOF; got != want { 153 t.Errorf("got %v, want %v", got, want) 154 } 155 if got, want := n, 0; got != want { 156 t.Errorf("got %v, want %v", got, want) 157 } 158 } 159 160 func TestSortReader(t *testing.T) { 161 const N = 1 << 20 162 var ( 163 fz = fuzz.NewWithSeed(123456) 164 r = &fuzzReader{fz, N, frame.Frame{}} 165 ctx = context.Background() 166 typ = slicetype.New(typeOfString, typeOfInt, typeOfSliceOfInt) 167 ) 168 sorted, err := SortReader(ctx, 1<<19, typ, r) 169 if err != nil { 170 t.Fatal(err) 171 } 172 out := frame.Make(typ, N, N) 173 n, err := sliceio.ReadFull(ctx, sorted, out) 174 if err != nil && err != sliceio.EOF { 175 t.Fatal(err) 176 } 177 if got, want := n, N; got != want { 178 t.Errorf("got %v, want %v", got, want) 179 } 180 if err == nil { 181 n, err = sliceio.ReadFull(ctx, sorted, frame.Make(typ, 1, 1)) 182 if got, want := err, sliceio.EOF; got != want { 183 t.Errorf("got %v, want %v", got, want) 184 } 185 if got, want := n, 0; got != want { 186 t.Errorf("got %v, want %v", got, want) 187 } 188 } 189 if !sort.IsSorted(out) { 190 t.Error("output not sorted") 191 } 192 sort.Sort(r.All) 193 // Just find unique keys since the sort is not stable. 194 keys := out.Interface(0).([]string) 195 if !reflect.DeepEqual(r.All.Interface(0), keys) { 196 t.Fatal("keys not equal") 197 } 198 keyCount := make(map[string]int) 199 for _, k := range keys { 200 keyCount[k]++ 201 } 202 var ( 203 outInts = out.Interface(1).([]int) 204 allInts = r.All.Interface(1).([]int) 205 outSlices = out.Interface(2).([][]int) 206 allSlices = r.All.Interface(2).([][]int) 207 ) 208 for i, k := range keys { 209 if keyCount[k] > 1 { 210 continue 211 } 212 if got, want := outInts[i], allInts[i]; got != want { 213 t.Errorf("got %v, want %v for %d", got, want, i) 214 } 215 if got, want := outSlices[i], allSlices[i]; !reflect.DeepEqual(got, want) { 216 t.Errorf("got %v, want %v for %d", got, want, i) 217 } 218 } 219 } 220 221 func TestReduceReader(t *testing.T) { 222 const ( 223 N = 1000 224 M = 10 225 ) 226 var ( 227 ints = make([]int, N) 228 strs = make([]string, N) 229 ) 230 for i := range ints { 231 ints[i] = i 232 strs[i] = "x" 233 } 234 f := frame.Slices(ints, strs, ints) 235 f = f.Prefixed(2) 236 readers := make([]sliceio.Reader, M) 237 for i := range readers { 238 readers[i] = sliceio.FrameReader(f) 239 } 240 fn, ok := slicefunc.Of(func(x, y int) int { return x + y }) 241 if !ok { 242 t.Fatal("unexpected bad func") 243 } 244 reducer := Reduce(f, "testreduce", readers, fn) 245 var ( 246 outIntsKey []int 247 outStrsKey []string 248 outIntsVal []int 249 ) 250 if err := sliceio.ReadAll(context.Background(), reducer, &outIntsKey, &outStrsKey, &outIntsVal); err != nil { 251 t.Fatal(err) 252 } 253 if got, want := len(outIntsKey), N; got != want { 254 t.Fatalf("got %v, want %v", got, want) 255 } 256 for i := range outIntsKey { 257 if got, want := outIntsKey[i], i; got != want { 258 t.Errorf("index %d: got %v, want %v", i, got, want) 259 } 260 if got, want := outStrsKey[i], "x"; got != want { 261 t.Errorf("index %d: got %v, want %v", i, got, want) 262 } 263 if got, want := outIntsVal[i], i*M; got != want { 264 t.Errorf("index %d: got %v, want %v", i, got, want) 265 } 266 } 267 }