github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/algo/uidlist_test.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package algo
    18  
    19  import (
    20  	"fmt"
    21  	"math/rand"
    22  	"sort"
    23  	"testing"
    24  	"time"
    25  
    26  	"github.com/dgraph-io/dgraph/codec"
    27  	"github.com/dgraph-io/dgraph/protos/pb"
    28  	"github.com/stretchr/testify/require"
    29  )
    30  
    31  func newList(data []uint64) *pb.List {
    32  	return &pb.List{Uids: data}
    33  }
    34  
    35  func TestMergeSorted1(t *testing.T) {
    36  	input := []*pb.List{
    37  		newList([]uint64{55}),
    38  	}
    39  	require.Equal(t, MergeSorted(input).Uids, []uint64{55})
    40  }
    41  
    42  func TestMergeSorted2(t *testing.T) {
    43  	input := []*pb.List{
    44  		newList([]uint64{1, 3, 6, 8, 10}),
    45  		newList([]uint64{2, 4, 5, 7, 15}),
    46  	}
    47  	require.Equal(t, MergeSorted(input).Uids,
    48  		[]uint64{1, 2, 3, 4, 5, 6, 7, 8, 10, 15})
    49  }
    50  
    51  func TestMergeSorted3(t *testing.T) {
    52  	input := []*pb.List{
    53  		newList([]uint64{1, 3, 6, 8, 10}),
    54  		newList([]uint64{}),
    55  	}
    56  	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
    57  }
    58  
    59  func TestMergeSorted4(t *testing.T) {
    60  	input := []*pb.List{
    61  		newList([]uint64{}),
    62  		newList([]uint64{1, 3, 6, 8, 10}),
    63  	}
    64  	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10})
    65  }
    66  
    67  func TestMergeSorted5(t *testing.T) {
    68  	input := []*pb.List{
    69  		newList([]uint64{}),
    70  		newList([]uint64{}),
    71  	}
    72  	require.Empty(t, MergeSorted(input).Uids)
    73  }
    74  
    75  func TestMergeSorted6(t *testing.T) {
    76  	input := []*pb.List{
    77  		newList([]uint64{11, 13, 16, 18, 20}),
    78  		newList([]uint64{12, 14, 15, 15, 16, 16, 17, 25}),
    79  		newList([]uint64{1, 2}),
    80  	}
    81  	require.Equal(t, MergeSorted(input).Uids,
    82  		[]uint64{1, 2, 11, 12, 13, 14, 15, 16, 17, 18, 20, 25})
    83  }
    84  
    85  func TestMergeSorted7(t *testing.T) {
    86  	input := []*pb.List{
    87  		newList([]uint64{5, 6, 7}),
    88  		newList([]uint64{3, 4}),
    89  		newList([]uint64{1, 2}),
    90  		newList([]uint64{}),
    91  	}
    92  	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 5, 6, 7})
    93  }
    94  
    95  func TestMergeSorted8(t *testing.T) {
    96  	input := []*pb.List{}
    97  	require.Empty(t, MergeSorted(input).Uids)
    98  }
    99  
   100  func TestMergeSorted9(t *testing.T) {
   101  	input := []*pb.List{
   102  		newList([]uint64{1, 1, 1}),
   103  	}
   104  	require.Equal(t, MergeSorted(input).Uids, []uint64{1})
   105  }
   106  
   107  func TestMergeSorted10(t *testing.T) {
   108  	input := []*pb.List{
   109  		newList([]uint64{1, 2, 3, 3, 6}),
   110  		newList([]uint64{4, 8, 9}),
   111  	}
   112  	require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 6, 8, 9})
   113  }
   114  
   115  func TestIntersectSorted1(t *testing.T) {
   116  	input := []*pb.List{
   117  		newList([]uint64{1, 2, 3}),
   118  		newList([]uint64{2, 3, 4, 5}),
   119  	}
   120  	require.Equal(t, []uint64{2, 3}, IntersectSorted(input).Uids)
   121  }
   122  
   123  func TestIntersectSorted2(t *testing.T) {
   124  	input := []*pb.List{
   125  		newList([]uint64{1, 2, 3}),
   126  	}
   127  	require.Equal(t, IntersectSorted(input).Uids, []uint64{1, 2, 3})
   128  }
   129  
   130  func TestIntersectSorted3(t *testing.T) {
   131  	input := []*pb.List{}
   132  	require.Empty(t, IntersectSorted(input).Uids)
   133  }
   134  
   135  func TestIntersectSorted4(t *testing.T) {
   136  	input := []*pb.List{
   137  		newList([]uint64{100, 101}),
   138  	}
   139  	require.Equal(t, IntersectSorted(input).Uids, []uint64{100, 101})
   140  }
   141  
   142  func TestIntersectSorted5(t *testing.T) {
   143  	input := []*pb.List{
   144  		newList([]uint64{1, 2, 3}),
   145  		newList([]uint64{2, 3, 4, 5}),
   146  		newList([]uint64{4, 5, 6}),
   147  	}
   148  	require.Empty(t, IntersectSorted(input).Uids)
   149  }
   150  
   151  func TestIntersectSorted6(t *testing.T) {
   152  	input := []*pb.List{
   153  		newList([]uint64{10, 12, 13}),
   154  		newList([]uint64{2, 3, 4, 13}),
   155  		newList([]uint64{4, 5, 6}),
   156  	}
   157  	require.Empty(t, IntersectSorted(input).Uids)
   158  }
   159  
   160  func TestDiffSorted1(t *testing.T) {
   161  	input := []*pb.List{
   162  		newList([]uint64{1, 2, 3}),
   163  		newList([]uint64{1}),
   164  	}
   165  	output := Difference(input[0], input[1])
   166  	require.Equal(t, []uint64{2, 3}, output.Uids)
   167  }
   168  
   169  func TestDiffSorted2(t *testing.T) {
   170  	input := []*pb.List{
   171  		newList([]uint64{1, 2, 3}),
   172  		newList([]uint64{2}),
   173  	}
   174  	output := Difference(input[0], input[1])
   175  	require.Equal(t, []uint64{1, 3}, output.Uids)
   176  }
   177  
   178  func TestDiffSorted3(t *testing.T) {
   179  	input := []*pb.List{
   180  		newList([]uint64{1, 2, 3}),
   181  		newList([]uint64{3}),
   182  	}
   183  	output := Difference(input[0], input[1])
   184  	require.Equal(t, []uint64{1, 2}, output.Uids)
   185  }
   186  
   187  func TestDiffSorted4(t *testing.T) {
   188  	input := []*pb.List{
   189  		newList([]uint64{1, 2, 3}),
   190  		newList([]uint64{}),
   191  	}
   192  	output := Difference(input[0], input[1])
   193  	require.Equal(t, []uint64{1, 2, 3}, output.Uids)
   194  }
   195  
   196  func TestDiffSorted5(t *testing.T) {
   197  	input := []*pb.List{
   198  		newList([]uint64{}),
   199  		newList([]uint64{1, 2}),
   200  	}
   201  	output := Difference(input[0], input[1])
   202  	require.Equal(t, []uint64{}, output.Uids)
   203  }
   204  
   205  func TestSubSorted1(t *testing.T) {
   206  	input := []*pb.List{
   207  		newList([]uint64{1, 2, 3}),
   208  		newList([]uint64{2, 3, 4, 5}),
   209  	}
   210  	output := Difference(input[0], input[1])
   211  	require.Equal(t, []uint64{1}, output.Uids)
   212  }
   213  
   214  func TestSubSorted6(t *testing.T) {
   215  	input := []*pb.List{
   216  		newList([]uint64{10, 12, 13}),
   217  		newList([]uint64{2, 3, 4, 13}),
   218  	}
   219  	output := Difference(input[0], input[1])
   220  	require.Equal(t, []uint64{10, 12}, output.Uids)
   221  }
   222  
   223  func TestUIDListIntersect1(t *testing.T) {
   224  	u := newList([]uint64{1, 2, 3})
   225  	v := newList([]uint64{})
   226  	IntersectWith(u, v, u)
   227  	require.Empty(t, u.Uids)
   228  }
   229  
   230  func TestUIDListIntersect2(t *testing.T) {
   231  	u := newList([]uint64{1, 2, 3})
   232  	v := newList([]uint64{1, 2, 3, 4, 5})
   233  	IntersectWith(u, v, u)
   234  	require.Equal(t, []uint64{1, 2, 3}, u.Uids)
   235  	require.Equal(t, []uint64{1, 2, 3, 4, 5}, v.Uids)
   236  }
   237  
   238  func TestUIDListIntersect3(t *testing.T) {
   239  	u := newList([]uint64{1, 2, 3})
   240  	v := newList([]uint64{2})
   241  	IntersectWith(u, v, u)
   242  	require.Equal(t, []uint64{2}, u.Uids)
   243  	require.Equal(t, []uint64{2}, v.Uids)
   244  }
   245  
   246  func TestUIDListIntersect4(t *testing.T) {
   247  	u := newList([]uint64{1, 2, 3})
   248  	v := newList([]uint64{0, 5})
   249  	IntersectWith(u, v, u)
   250  	require.Empty(t, u.Uids)
   251  	require.Equal(t, []uint64{0, 5}, v.Uids)
   252  }
   253  
   254  func TestUIDListIntersect5(t *testing.T) {
   255  	u := newList([]uint64{1, 2, 3})
   256  	v := newList([]uint64{3, 5})
   257  	IntersectWith(u, v, u)
   258  	require.Equal(t, []uint64{3}, u.Uids)
   259  }
   260  
   261  func TestUIDListIntersectDupFirst(t *testing.T) {
   262  	u := newList([]uint64{1, 1, 2, 3})
   263  	v := newList([]uint64{1, 2})
   264  	IntersectWith(u, v, u)
   265  	require.Equal(t, []uint64{1, 2}, u.Uids)
   266  }
   267  
   268  func TestUIDListIntersectDupBoth(t *testing.T) {
   269  	u := newList([]uint64{1, 1, 2, 3, 5})
   270  	v := newList([]uint64{1, 1, 2, 4})
   271  	IntersectWith(u, v, u)
   272  	require.Equal(t, []uint64{1, 1, 2}, u.Uids)
   273  }
   274  
   275  func TestUIDListIntersectDupSecond(t *testing.T) {
   276  	u := newList([]uint64{1, 2, 3, 5})
   277  	v := newList([]uint64{1, 1, 2, 4})
   278  	IntersectWith(u, v, u)
   279  	require.Equal(t, []uint64{1, 2}, u.Uids)
   280  }
   281  
   282  func TestApplyFilterUint(t *testing.T) {
   283  	l := []uint64{1, 2, 3, 4, 5}
   284  	u := newList(l)
   285  	ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 })
   286  	require.Equal(t, []uint64{1, 3, 5}, u.Uids)
   287  }
   288  
   289  // Benchmarks for IntersectWith
   290  func BenchmarkListIntersectRandom(b *testing.B) {
   291  	randomTests := func(arrSz int, overlap float64) {
   292  		limit := int64(float64(arrSz) / overlap)
   293  		u1, v1 := make([]uint64, arrSz, arrSz), make([]uint64, arrSz, arrSz)
   294  		for i := 0; i < arrSz; i++ {
   295  			u1[i] = uint64(rand.Int63n(limit))
   296  			v1[i] = uint64(rand.Int63n(limit))
   297  		}
   298  		sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
   299  		sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
   300  
   301  		u := newList(u1)
   302  		v := newList(v1)
   303  		dst1 := &pb.List{}
   304  		dst2 := &pb.List{}
   305  		compressedUids := codec.Encode(u1, 256)
   306  
   307  		b.Run(fmt.Sprintf(":size=%d:overlap=%.2f:", arrSz, overlap),
   308  			func(b *testing.B) {
   309  				for k := 0; k < b.N; k++ {
   310  					IntersectWith(u, v, dst1)
   311  				}
   312  			})
   313  
   314  		b.Run(fmt.Sprintf(":compressed:size=%d:overlap=%.2f:", arrSz, overlap),
   315  			func(b *testing.B) {
   316  				for k := 0; k < b.N; k++ {
   317  					IntersectCompressedWith(compressedUids, 0, v, dst2)
   318  				}
   319  			})
   320  		i := 0
   321  		j := 0
   322  		for i < len(dst1.Uids) {
   323  			if dst1.Uids[i] != dst2.Uids[j] {
   324  				b.Errorf("Unexpected error in intersection")
   325  			}
   326  			// Behaviour of bin intersect is not defined when duplicates are present
   327  			i = skipDuplicate(dst1.Uids, i)
   328  			j = skipDuplicate(dst2.Uids, j)
   329  		}
   330  		if j < len(dst2.Uids) {
   331  			b.Errorf("Unexpected error in intersection")
   332  		}
   333  	}
   334  
   335  	randomTests(10240, 0.3)
   336  	randomTests(1024000, 0.3)
   337  	randomTests(10240, 0.1)
   338  	randomTests(1024000, 0.1)
   339  	randomTests(10240, 0.01)
   340  	randomTests(1024000, 0.01)
   341  }
   342  
   343  func BenchmarkListIntersectRatio(b *testing.B) {
   344  	randomTests := func(sz int, overlap float64) {
   345  		rs := []int{1, 10, 50, 100, 500, 1000, 10000, 100000, 1000000}
   346  		for _, r := range rs {
   347  			sz1 := sz
   348  			sz2 := sz * r
   349  			if sz2 > 1000000 {
   350  				break
   351  			}
   352  
   353  			u1, v1 := make([]uint64, sz1, sz1), make([]uint64, sz2, sz2)
   354  			limit := int64(float64(sz) / overlap)
   355  			for i := 0; i < sz1; i++ {
   356  				u1[i] = uint64(rand.Int63n(limit))
   357  			}
   358  			for i := 0; i < sz2; i++ {
   359  				v1[i] = uint64(rand.Int63n(limit))
   360  			}
   361  			sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] })
   362  			sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] })
   363  
   364  			u := &pb.List{Uids: u1}
   365  			v := &pb.List{Uids: v1}
   366  			dst1 := &pb.List{}
   367  			dst2 := &pb.List{}
   368  			compressedUids := codec.Encode(v1, 256)
   369  
   370  			fmt.Printf("len: %d, compressed: %d, bytes/int: %f\n",
   371  				len(v1), compressedUids.Size(), float64(compressedUids.Size())/float64(len(v1)))
   372  			b.Run(fmt.Sprintf(":IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
   373  				func(b *testing.B) {
   374  					for k := 0; k < b.N; k++ {
   375  						IntersectWith(u, v, dst1)
   376  					}
   377  				})
   378  			b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap),
   379  				func(b *testing.B) {
   380  					for k := 0; k < b.N; k++ {
   381  						IntersectCompressedWith(compressedUids, 0, u, dst2)
   382  					}
   383  				})
   384  			fmt.Println()
   385  			i := 0
   386  			j := 0
   387  			for i < len(dst1.Uids) {
   388  				if dst1.Uids[i] != dst2.Uids[j] {
   389  					b.Errorf("Unexpected error in intersection")
   390  				}
   391  				// Behaviour of bin intersect is not defined when duplicates are present
   392  				i = skipDuplicate(dst1.Uids, i)
   393  				j = skipDuplicate(dst2.Uids, j)
   394  			}
   395  			if j < len(dst2.Uids) {
   396  				b.Errorf("Unexpected error in intersection")
   397  			}
   398  		}
   399  	}
   400  
   401  	randomTests(10, 0.01)
   402  	randomTests(100, 0.01)
   403  	randomTests(1000, 0.01)
   404  	randomTests(10000, 0.01)
   405  	randomTests(100000, 0.01)
   406  	randomTests(1000000, 0.01)
   407  }
   408  
   409  func skipDuplicate(in []uint64, idx int) int {
   410  	i := idx + 1
   411  	for i < len(in) && in[i] == in[idx] {
   412  		i++
   413  	}
   414  	return i
   415  }
   416  
   417  func sortUint64(nums []uint64) {
   418  	sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] })
   419  }
   420  
   421  func fillNums(N1, N2 int) ([]uint64, []uint64, []uint64) {
   422  	rand.Seed(time.Now().UnixNano())
   423  
   424  	commonNums := make([]uint64, N1)
   425  	blockNums := make([]uint64, N1+N2)
   426  	otherNums := make([]uint64, N1+N2)
   427  
   428  	for i := 0; i < N1; i++ {
   429  		val := rand.Uint64()
   430  		commonNums[i] = val
   431  		blockNums[i] = val
   432  		otherNums[i] = val
   433  	}
   434  
   435  	for i := N1; i < N1+N2; i++ {
   436  		blockNums[i] = rand.Uint64()
   437  		otherNums[i] = rand.Uint64()
   438  	}
   439  
   440  	sortUint64(commonNums)
   441  	sortUint64(blockNums)
   442  	sortUint64(otherNums)
   443  
   444  	return commonNums, blockNums, otherNums
   445  }
   446  
   447  func TestIntersectCompressedWithLinJump(t *testing.T) {
   448  	lengths := []int{0, 1, 3, 11, 100}
   449  
   450  	for _, N1 := range lengths {
   451  		for _, N2 := range lengths {
   452  			// Intersection of blockNums and otherNums is commonNums.
   453  			commonNums, blockNums, otherNums := fillNums(N1, N2)
   454  
   455  			enc := codec.Encoder{BlockSize: 10}
   456  			for _, num := range blockNums {
   457  				enc.Add(num)
   458  			}
   459  
   460  			pack := enc.Done()
   461  			dec := codec.Decoder{Pack: pack}
   462  
   463  			actual := make([]uint64, 0)
   464  			IntersectCompressedWithLinJump(&dec, otherNums, &actual)
   465  			require.Equal(t, commonNums, actual)
   466  		}
   467  	}
   468  }