github.com/grailbio/bigslice@v0.0.0-20230519005545-30c4c12152ad/sortio/sort_test.go (about)

     1  // Copyright 2018 GRAIL, Inc. All rights reserved.
     2  // Use of this source code is governed by the Apache 2.0
     3  // license that can be found in the LICENSE file.
     4  
     5  package sortio
     6  
     7  import (
     8  	"context"
     9  	"reflect"
    10  	"sort"
    11  	"testing"
    12  
    13  	fuzz "github.com/google/gofuzz"
    14  	"github.com/grailbio/bigslice/frame"
    15  	"github.com/grailbio/bigslice/slicefunc"
    16  	"github.com/grailbio/bigslice/sliceio"
    17  	"github.com/grailbio/bigslice/slicetype"
    18  )
    19  
    20  var (
    21  	typeOfString        = reflect.TypeOf("")
    22  	typeOfInt           = reflect.TypeOf(0)
    23  	typeOfSliceOfString = reflect.SliceOf(typeOfString)
    24  	typeOfSliceOfInt    = reflect.SliceOf(typeOfInt)
    25  )
    26  
    27  // FuzzFrame creates a fuzzed frame of length n, where columns
    28  // have the provided types.
    29  func fuzzFrame(fz *fuzz.Fuzzer, n int, types ...reflect.Type) frame.Frame {
    30  	f := frame.Make(slicetype.New(types...), n, n)
    31  	for i := 0; i < f.NumOut(); i++ {
    32  		vp := reflect.New(types[i])
    33  		for j := 0; j < n; j++ {
    34  			fz.Fuzz(vp.Interface())
    35  			f.Index(i, j).Set(vp.Elem())
    36  		}
    37  	}
    38  	return f
    39  }
    40  
    41  type fuzzReader struct {
    42  	Fuzz *fuzz.Fuzzer
    43  	N    int
    44  	All  frame.Frame
    45  }
    46  
    47  func (f *fuzzReader) Read(ctx context.Context, out frame.Frame) (int, error) {
    48  	if f.N == 0 {
    49  		return 0, sliceio.EOF
    50  	}
    51  	n := out.Len()
    52  	if f.N < n {
    53  		n = f.N
    54  	}
    55  	f.N -= n
    56  	for i := 0; i < out.NumOut(); i++ {
    57  		vp := reflect.New(out.Out(i))
    58  		for j := 0; j < n; j++ {
    59  			f.Fuzz.Fuzz(vp.Interface())
    60  			out.Index(i, j).Set(vp.Elem())
    61  		}
    62  	}
    63  	f.All = frame.AppendFrame(f.All, out.Slice(0, n))
    64  	return n, nil
    65  }
    66  
    67  func TestSort(t *testing.T) {
    68  	fz := fuzz.NewWithSeed(31415)
    69  
    70  	f := fuzzFrame(fz, 1000, typeOfString, typeOfString, typeOfString)
    71  	// Replace the third column with the concatenation of the two first
    72  	// columns so we can verify that the full rows are swapped.
    73  	for i := 0; i < f.Len(); i++ {
    74  		f.Index(2, i).SetString(f.Index(0, i).String() + f.Index(1, i).String())
    75  	}
    76  	if sort.IsSorted(f) {
    77  		t.Fatal("unlikely")
    78  	}
    79  	sort.Sort(f)
    80  	if !sort.IsSorted(f) {
    81  		t.Fatal("frame did not sort")
    82  	}
    83  	// Make sure that the full rows are swapped.
    84  	for i := 0; i < f.Len(); i++ {
    85  		if got, want := f.Index(2, i).String(), f.Index(0, i).String()+f.Index(1, i).String(); got != want {
    86  			t.Errorf("row %d: got %v, want %v", i, got, want)
    87  		}
    88  	}
    89  }
    90  
    91  func TestMergeReader(t *testing.T) {
    92  	fz := fuzz.NewWithSeed(12345)
    93  	const (
    94  		N = 1000
    95  		M = 100
    96  	)
    97  
    98  	var (
    99  		frames  = make([]frame.Frame, M)
   100  		readers = make([]sliceio.Reader, M)
   101  	)
   102  	for i := range frames {
   103  		f := fuzzFrame(fz, N, typeOfString, typeOfString, typeOfSliceOfString)
   104  		// Replace the third column with a slice of of the two first
   105  		// columns so we can verify that the full rows are swapped.
   106  		for j := 0; j < f.Len(); j++ {
   107  			s := reflect.MakeSlice(typeOfSliceOfString, 2, 2)
   108  			s.Index(0).Set(f.Index(0, j))
   109  			s.Index(1).Set(f.Index(1, j))
   110  			f.Index(2, j).Set(s)
   111  		}
   112  		sort.Sort(f)
   113  		frames[i] = f
   114  		readers[i] = sliceio.FrameReader(f)
   115  	}
   116  
   117  	ctx := context.Background()
   118  	m, err := NewMergeReader(ctx, frames[0], readers)
   119  	if err != nil {
   120  		t.Fatal(err)
   121  	}
   122  
   123  	out := frame.Make(frames[0], N*M, N*M)
   124  	n, err := sliceio.ReadFull(ctx, m, out)
   125  	if err != nil && err != sliceio.EOF {
   126  		t.Fatal(err)
   127  	}
   128  	if got, want := n, N*M; got != want {
   129  		t.Errorf("got %v, want %v", got, want)
   130  	}
   131  	if !sort.IsSorted(out) {
   132  		t.Error("frame not sorted")
   133  	}
   134  	var (
   135  		a = out.Interface(0).([]string)
   136  		b = out.Interface(1).([]string)
   137  		c = out.Interface(2).([][]string)
   138  	)
   139  	for i := range a {
   140  		if got, want := len(c[i]), 2; got != want {
   141  			t.Errorf("got %v, want %v for key %v", got, want, i)
   142  			continue
   143  		}
   144  		if got, want := c[i][0], a[i]; got != want {
   145  			t.Errorf("got %v, want %v for key %v", got, want, i)
   146  		}
   147  		if got, want := c[i][1], b[i]; got != want {
   148  			t.Errorf("got %v, want %v for key %v", got, want, i)
   149  		}
   150  	}
   151  	n, err = sliceio.ReadFull(ctx, m, out)
   152  	if got, want := err, sliceio.EOF; got != want {
   153  		t.Errorf("got %v, want %v", got, want)
   154  	}
   155  	if got, want := n, 0; got != want {
   156  		t.Errorf("got %v, want %v", got, want)
   157  	}
   158  }
   159  
   160  func TestSortReader(t *testing.T) {
   161  	const N = 1 << 20
   162  	var (
   163  		fz  = fuzz.NewWithSeed(123456)
   164  		r   = &fuzzReader{fz, N, frame.Frame{}}
   165  		ctx = context.Background()
   166  		typ = slicetype.New(typeOfString, typeOfInt, typeOfSliceOfInt)
   167  	)
   168  	sorted, err := SortReader(ctx, 1<<19, typ, r)
   169  	if err != nil {
   170  		t.Fatal(err)
   171  	}
   172  	out := frame.Make(typ, N, N)
   173  	n, err := sliceio.ReadFull(ctx, sorted, out)
   174  	if err != nil && err != sliceio.EOF {
   175  		t.Fatal(err)
   176  	}
   177  	if got, want := n, N; got != want {
   178  		t.Errorf("got %v, want %v", got, want)
   179  	}
   180  	if err == nil {
   181  		n, err = sliceio.ReadFull(ctx, sorted, frame.Make(typ, 1, 1))
   182  		if got, want := err, sliceio.EOF; got != want {
   183  			t.Errorf("got %v, want %v", got, want)
   184  		}
   185  		if got, want := n, 0; got != want {
   186  			t.Errorf("got %v, want %v", got, want)
   187  		}
   188  	}
   189  	if !sort.IsSorted(out) {
   190  		t.Error("output not sorted")
   191  	}
   192  	sort.Sort(r.All)
   193  	// Just find unique keys since the sort is not stable.
   194  	keys := out.Interface(0).([]string)
   195  	if !reflect.DeepEqual(r.All.Interface(0), keys) {
   196  		t.Fatal("keys not equal")
   197  	}
   198  	keyCount := make(map[string]int)
   199  	for _, k := range keys {
   200  		keyCount[k]++
   201  	}
   202  	var (
   203  		outInts   = out.Interface(1).([]int)
   204  		allInts   = r.All.Interface(1).([]int)
   205  		outSlices = out.Interface(2).([][]int)
   206  		allSlices = r.All.Interface(2).([][]int)
   207  	)
   208  	for i, k := range keys {
   209  		if keyCount[k] > 1 {
   210  			continue
   211  		}
   212  		if got, want := outInts[i], allInts[i]; got != want {
   213  			t.Errorf("got %v, want %v for %d", got, want, i)
   214  		}
   215  		if got, want := outSlices[i], allSlices[i]; !reflect.DeepEqual(got, want) {
   216  			t.Errorf("got %v, want %v for %d", got, want, i)
   217  		}
   218  	}
   219  }
   220  
   221  func TestReduceReader(t *testing.T) {
   222  	const (
   223  		N = 1000
   224  		M = 10
   225  	)
   226  	var (
   227  		ints = make([]int, N)
   228  		strs = make([]string, N)
   229  	)
   230  	for i := range ints {
   231  		ints[i] = i
   232  		strs[i] = "x"
   233  	}
   234  	f := frame.Slices(ints, strs, ints)
   235  	f = f.Prefixed(2)
   236  	readers := make([]sliceio.Reader, M)
   237  	for i := range readers {
   238  		readers[i] = sliceio.FrameReader(f)
   239  	}
   240  	fn, ok := slicefunc.Of(func(x, y int) int { return x + y })
   241  	if !ok {
   242  		t.Fatal("unexpected bad func")
   243  	}
   244  	reducer := Reduce(f, "testreduce", readers, fn)
   245  	var (
   246  		outIntsKey []int
   247  		outStrsKey []string
   248  		outIntsVal []int
   249  	)
   250  	if err := sliceio.ReadAll(context.Background(), reducer, &outIntsKey, &outStrsKey, &outIntsVal); err != nil {
   251  		t.Fatal(err)
   252  	}
   253  	if got, want := len(outIntsKey), N; got != want {
   254  		t.Fatalf("got %v, want %v", got, want)
   255  	}
   256  	for i := range outIntsKey {
   257  		if got, want := outIntsKey[i], i; got != want {
   258  			t.Errorf("index %d: got %v, want %v", i, got, want)
   259  		}
   260  		if got, want := outStrsKey[i], "x"; got != want {
   261  			t.Errorf("index %d: got %v, want %v", i, got, want)
   262  		}
   263  		if got, want := outIntsVal[i], i*M; got != want {
   264  			t.Errorf("index %d: got %v, want %v", i, got, want)
   265  		}
   266  	}
   267  }