github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/algo/uidlist_test.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package algo 18 19 import ( 20 "fmt" 21 "math/rand" 22 "sort" 23 "testing" 24 "time" 25 26 "github.com/dgraph-io/dgraph/codec" 27 "github.com/dgraph-io/dgraph/protos/pb" 28 "github.com/stretchr/testify/require" 29 ) 30 31 func newList(data []uint64) *pb.List { 32 return &pb.List{Uids: data} 33 } 34 35 func TestMergeSorted1(t *testing.T) { 36 input := []*pb.List{ 37 newList([]uint64{55}), 38 } 39 require.Equal(t, MergeSorted(input).Uids, []uint64{55}) 40 } 41 42 func TestMergeSorted2(t *testing.T) { 43 input := []*pb.List{ 44 newList([]uint64{1, 3, 6, 8, 10}), 45 newList([]uint64{2, 4, 5, 7, 15}), 46 } 47 require.Equal(t, MergeSorted(input).Uids, 48 []uint64{1, 2, 3, 4, 5, 6, 7, 8, 10, 15}) 49 } 50 51 func TestMergeSorted3(t *testing.T) { 52 input := []*pb.List{ 53 newList([]uint64{1, 3, 6, 8, 10}), 54 newList([]uint64{}), 55 } 56 require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10}) 57 } 58 59 func TestMergeSorted4(t *testing.T) { 60 input := []*pb.List{ 61 newList([]uint64{}), 62 newList([]uint64{1, 3, 6, 8, 10}), 63 } 64 require.Equal(t, MergeSorted(input).Uids, []uint64{1, 3, 6, 8, 10}) 65 } 66 67 func TestMergeSorted5(t *testing.T) { 68 input := []*pb.List{ 69 newList([]uint64{}), 70 newList([]uint64{}), 71 } 72 require.Empty(t, MergeSorted(input).Uids) 73 } 74 75 func TestMergeSorted6(t *testing.T) { 76 input := []*pb.List{ 77 newList([]uint64{11, 13, 16, 18, 20}), 78 newList([]uint64{12, 14, 15, 15, 16, 16, 17, 25}), 79 newList([]uint64{1, 2}), 80 } 81 require.Equal(t, MergeSorted(input).Uids, 82 []uint64{1, 2, 11, 12, 13, 14, 15, 16, 17, 18, 20, 25}) 83 } 84 85 func TestMergeSorted7(t *testing.T) { 86 input := []*pb.List{ 87 newList([]uint64{5, 6, 7}), 88 newList([]uint64{3, 4}), 89 newList([]uint64{1, 2}), 90 newList([]uint64{}), 91 } 92 require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 5, 6, 7}) 93 } 94 95 func TestMergeSorted8(t *testing.T) { 96 input := []*pb.List{} 97 require.Empty(t, MergeSorted(input).Uids) 98 } 99 100 func TestMergeSorted9(t *testing.T) { 101 input := []*pb.List{ 102 newList([]uint64{1, 1, 1}), 103 } 104 require.Equal(t, MergeSorted(input).Uids, []uint64{1}) 105 } 106 107 func TestMergeSorted10(t *testing.T) { 108 input := []*pb.List{ 109 newList([]uint64{1, 2, 3, 3, 6}), 110 newList([]uint64{4, 8, 9}), 111 } 112 require.Equal(t, MergeSorted(input).Uids, []uint64{1, 2, 3, 4, 6, 8, 9}) 113 } 114 115 func TestIntersectSorted1(t *testing.T) { 116 input := []*pb.List{ 117 newList([]uint64{1, 2, 3}), 118 newList([]uint64{2, 3, 4, 5}), 119 } 120 require.Equal(t, []uint64{2, 3}, IntersectSorted(input).Uids) 121 } 122 123 func TestIntersectSorted2(t *testing.T) { 124 input := []*pb.List{ 125 newList([]uint64{1, 2, 3}), 126 } 127 require.Equal(t, IntersectSorted(input).Uids, []uint64{1, 2, 3}) 128 } 129 130 func TestIntersectSorted3(t *testing.T) { 131 input := []*pb.List{} 132 require.Empty(t, IntersectSorted(input).Uids) 133 } 134 135 func TestIntersectSorted4(t *testing.T) { 136 input := []*pb.List{ 137 newList([]uint64{100, 101}), 138 } 139 require.Equal(t, IntersectSorted(input).Uids, []uint64{100, 101}) 140 } 141 142 func TestIntersectSorted5(t *testing.T) { 143 input := []*pb.List{ 144 newList([]uint64{1, 2, 3}), 145 newList([]uint64{2, 3, 4, 5}), 146 newList([]uint64{4, 5, 6}), 147 } 148 require.Empty(t, IntersectSorted(input).Uids) 149 } 150 151 func TestIntersectSorted6(t *testing.T) { 152 input := []*pb.List{ 153 newList([]uint64{10, 12, 13}), 154 newList([]uint64{2, 3, 4, 13}), 155 newList([]uint64{4, 5, 6}), 156 } 157 require.Empty(t, IntersectSorted(input).Uids) 158 } 159 160 func TestDiffSorted1(t *testing.T) { 161 input := []*pb.List{ 162 newList([]uint64{1, 2, 3}), 163 newList([]uint64{1}), 164 } 165 output := Difference(input[0], input[1]) 166 require.Equal(t, []uint64{2, 3}, output.Uids) 167 } 168 169 func TestDiffSorted2(t *testing.T) { 170 input := []*pb.List{ 171 newList([]uint64{1, 2, 3}), 172 newList([]uint64{2}), 173 } 174 output := Difference(input[0], input[1]) 175 require.Equal(t, []uint64{1, 3}, output.Uids) 176 } 177 178 func TestDiffSorted3(t *testing.T) { 179 input := []*pb.List{ 180 newList([]uint64{1, 2, 3}), 181 newList([]uint64{3}), 182 } 183 output := Difference(input[0], input[1]) 184 require.Equal(t, []uint64{1, 2}, output.Uids) 185 } 186 187 func TestDiffSorted4(t *testing.T) { 188 input := []*pb.List{ 189 newList([]uint64{1, 2, 3}), 190 newList([]uint64{}), 191 } 192 output := Difference(input[0], input[1]) 193 require.Equal(t, []uint64{1, 2, 3}, output.Uids) 194 } 195 196 func TestDiffSorted5(t *testing.T) { 197 input := []*pb.List{ 198 newList([]uint64{}), 199 newList([]uint64{1, 2}), 200 } 201 output := Difference(input[0], input[1]) 202 require.Equal(t, []uint64{}, output.Uids) 203 } 204 205 func TestSubSorted1(t *testing.T) { 206 input := []*pb.List{ 207 newList([]uint64{1, 2, 3}), 208 newList([]uint64{2, 3, 4, 5}), 209 } 210 output := Difference(input[0], input[1]) 211 require.Equal(t, []uint64{1}, output.Uids) 212 } 213 214 func TestSubSorted6(t *testing.T) { 215 input := []*pb.List{ 216 newList([]uint64{10, 12, 13}), 217 newList([]uint64{2, 3, 4, 13}), 218 } 219 output := Difference(input[0], input[1]) 220 require.Equal(t, []uint64{10, 12}, output.Uids) 221 } 222 223 func TestUIDListIntersect1(t *testing.T) { 224 u := newList([]uint64{1, 2, 3}) 225 v := newList([]uint64{}) 226 IntersectWith(u, v, u) 227 require.Empty(t, u.Uids) 228 } 229 230 func TestUIDListIntersect2(t *testing.T) { 231 u := newList([]uint64{1, 2, 3}) 232 v := newList([]uint64{1, 2, 3, 4, 5}) 233 IntersectWith(u, v, u) 234 require.Equal(t, []uint64{1, 2, 3}, u.Uids) 235 require.Equal(t, []uint64{1, 2, 3, 4, 5}, v.Uids) 236 } 237 238 func TestUIDListIntersect3(t *testing.T) { 239 u := newList([]uint64{1, 2, 3}) 240 v := newList([]uint64{2}) 241 IntersectWith(u, v, u) 242 require.Equal(t, []uint64{2}, u.Uids) 243 require.Equal(t, []uint64{2}, v.Uids) 244 } 245 246 func TestUIDListIntersect4(t *testing.T) { 247 u := newList([]uint64{1, 2, 3}) 248 v := newList([]uint64{0, 5}) 249 IntersectWith(u, v, u) 250 require.Empty(t, u.Uids) 251 require.Equal(t, []uint64{0, 5}, v.Uids) 252 } 253 254 func TestUIDListIntersect5(t *testing.T) { 255 u := newList([]uint64{1, 2, 3}) 256 v := newList([]uint64{3, 5}) 257 IntersectWith(u, v, u) 258 require.Equal(t, []uint64{3}, u.Uids) 259 } 260 261 func TestUIDListIntersectDupFirst(t *testing.T) { 262 u := newList([]uint64{1, 1, 2, 3}) 263 v := newList([]uint64{1, 2}) 264 IntersectWith(u, v, u) 265 require.Equal(t, []uint64{1, 2}, u.Uids) 266 } 267 268 func TestUIDListIntersectDupBoth(t *testing.T) { 269 u := newList([]uint64{1, 1, 2, 3, 5}) 270 v := newList([]uint64{1, 1, 2, 4}) 271 IntersectWith(u, v, u) 272 require.Equal(t, []uint64{1, 1, 2}, u.Uids) 273 } 274 275 func TestUIDListIntersectDupSecond(t *testing.T) { 276 u := newList([]uint64{1, 2, 3, 5}) 277 v := newList([]uint64{1, 1, 2, 4}) 278 IntersectWith(u, v, u) 279 require.Equal(t, []uint64{1, 2}, u.Uids) 280 } 281 282 func TestApplyFilterUint(t *testing.T) { 283 l := []uint64{1, 2, 3, 4, 5} 284 u := newList(l) 285 ApplyFilter(u, func(a uint64, idx int) bool { return (l[idx] % 2) == 1 }) 286 require.Equal(t, []uint64{1, 3, 5}, u.Uids) 287 } 288 289 // Benchmarks for IntersectWith 290 func BenchmarkListIntersectRandom(b *testing.B) { 291 randomTests := func(arrSz int, overlap float64) { 292 limit := int64(float64(arrSz) / overlap) 293 u1, v1 := make([]uint64, arrSz, arrSz), make([]uint64, arrSz, arrSz) 294 for i := 0; i < arrSz; i++ { 295 u1[i] = uint64(rand.Int63n(limit)) 296 v1[i] = uint64(rand.Int63n(limit)) 297 } 298 sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] }) 299 sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] }) 300 301 u := newList(u1) 302 v := newList(v1) 303 dst1 := &pb.List{} 304 dst2 := &pb.List{} 305 compressedUids := codec.Encode(u1, 256) 306 307 b.Run(fmt.Sprintf(":size=%d:overlap=%.2f:", arrSz, overlap), 308 func(b *testing.B) { 309 for k := 0; k < b.N; k++ { 310 IntersectWith(u, v, dst1) 311 } 312 }) 313 314 b.Run(fmt.Sprintf(":compressed:size=%d:overlap=%.2f:", arrSz, overlap), 315 func(b *testing.B) { 316 for k := 0; k < b.N; k++ { 317 IntersectCompressedWith(compressedUids, 0, v, dst2) 318 } 319 }) 320 i := 0 321 j := 0 322 for i < len(dst1.Uids) { 323 if dst1.Uids[i] != dst2.Uids[j] { 324 b.Errorf("Unexpected error in intersection") 325 } 326 // Behaviour of bin intersect is not defined when duplicates are present 327 i = skipDuplicate(dst1.Uids, i) 328 j = skipDuplicate(dst2.Uids, j) 329 } 330 if j < len(dst2.Uids) { 331 b.Errorf("Unexpected error in intersection") 332 } 333 } 334 335 randomTests(10240, 0.3) 336 randomTests(1024000, 0.3) 337 randomTests(10240, 0.1) 338 randomTests(1024000, 0.1) 339 randomTests(10240, 0.01) 340 randomTests(1024000, 0.01) 341 } 342 343 func BenchmarkListIntersectRatio(b *testing.B) { 344 randomTests := func(sz int, overlap float64) { 345 rs := []int{1, 10, 50, 100, 500, 1000, 10000, 100000, 1000000} 346 for _, r := range rs { 347 sz1 := sz 348 sz2 := sz * r 349 if sz2 > 1000000 { 350 break 351 } 352 353 u1, v1 := make([]uint64, sz1, sz1), make([]uint64, sz2, sz2) 354 limit := int64(float64(sz) / overlap) 355 for i := 0; i < sz1; i++ { 356 u1[i] = uint64(rand.Int63n(limit)) 357 } 358 for i := 0; i < sz2; i++ { 359 v1[i] = uint64(rand.Int63n(limit)) 360 } 361 sort.Slice(u1, func(i, j int) bool { return u1[i] < u1[j] }) 362 sort.Slice(v1, func(i, j int) bool { return v1[i] < v1[j] }) 363 364 u := &pb.List{Uids: u1} 365 v := &pb.List{Uids: v1} 366 dst1 := &pb.List{} 367 dst2 := &pb.List{} 368 compressedUids := codec.Encode(v1, 256) 369 370 fmt.Printf("len: %d, compressed: %d, bytes/int: %f\n", 371 len(v1), compressedUids.Size(), float64(compressedUids.Size())/float64(len(v1))) 372 b.Run(fmt.Sprintf(":IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap), 373 func(b *testing.B) { 374 for k := 0; k < b.N; k++ { 375 IntersectWith(u, v, dst1) 376 } 377 }) 378 b.Run(fmt.Sprintf("compressed:IntersectWith:ratio=%d:size=%d:overlap=%.2f:", r, sz, overlap), 379 func(b *testing.B) { 380 for k := 0; k < b.N; k++ { 381 IntersectCompressedWith(compressedUids, 0, u, dst2) 382 } 383 }) 384 fmt.Println() 385 i := 0 386 j := 0 387 for i < len(dst1.Uids) { 388 if dst1.Uids[i] != dst2.Uids[j] { 389 b.Errorf("Unexpected error in intersection") 390 } 391 // Behaviour of bin intersect is not defined when duplicates are present 392 i = skipDuplicate(dst1.Uids, i) 393 j = skipDuplicate(dst2.Uids, j) 394 } 395 if j < len(dst2.Uids) { 396 b.Errorf("Unexpected error in intersection") 397 } 398 } 399 } 400 401 randomTests(10, 0.01) 402 randomTests(100, 0.01) 403 randomTests(1000, 0.01) 404 randomTests(10000, 0.01) 405 randomTests(100000, 0.01) 406 randomTests(1000000, 0.01) 407 } 408 409 func skipDuplicate(in []uint64, idx int) int { 410 i := idx + 1 411 for i < len(in) && in[i] == in[idx] { 412 i++ 413 } 414 return i 415 } 416 417 func sortUint64(nums []uint64) { 418 sort.Slice(nums, func(i, j int) bool { return nums[i] < nums[j] }) 419 } 420 421 func fillNums(N1, N2 int) ([]uint64, []uint64, []uint64) { 422 rand.Seed(time.Now().UnixNano()) 423 424 commonNums := make([]uint64, N1) 425 blockNums := make([]uint64, N1+N2) 426 otherNums := make([]uint64, N1+N2) 427 428 for i := 0; i < N1; i++ { 429 val := rand.Uint64() 430 commonNums[i] = val 431 blockNums[i] = val 432 otherNums[i] = val 433 } 434 435 for i := N1; i < N1+N2; i++ { 436 blockNums[i] = rand.Uint64() 437 otherNums[i] = rand.Uint64() 438 } 439 440 sortUint64(commonNums) 441 sortUint64(blockNums) 442 sortUint64(otherNums) 443 444 return commonNums, blockNums, otherNums 445 } 446 447 func TestIntersectCompressedWithLinJump(t *testing.T) { 448 lengths := []int{0, 1, 3, 11, 100} 449 450 for _, N1 := range lengths { 451 for _, N2 := range lengths { 452 // Intersection of blockNums and otherNums is commonNums. 453 commonNums, blockNums, otherNums := fillNums(N1, N2) 454 455 enc := codec.Encoder{BlockSize: 10} 456 for _, num := range blockNums { 457 enc.Add(num) 458 } 459 460 pack := enc.Done() 461 dec := codec.Decoder{Pack: pack} 462 463 actual := make([]uint64, 0) 464 IntersectCompressedWithLinJump(&dec, otherNums, &actual) 465 require.Equal(t, commonNums, actual) 466 } 467 } 468 }