github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/algo/uidlist.go (about) 1 /* 2 * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors 3 * 4 * Licensed under the Apache License, Version 2.0 (the "License"); 5 * you may not use this file except in compliance with the License. 6 * You may obtain a copy of the License at 7 * 8 * http://www.apache.org/licenses/LICENSE-2.0 9 * 10 * Unless required by applicable law or agreed to in writing, software 11 * distributed under the License is distributed on an "AS IS" BASIS, 12 * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 13 * See the License for the specific language governing permissions and 14 * limitations under the License. 15 */ 16 17 package algo 18 19 import ( 20 "container/heap" 21 "sort" 22 23 "github.com/dgraph-io/dgraph/codec" 24 "github.com/dgraph-io/dgraph/protos/pb" 25 ) 26 27 const jump = 32 // Jump size in InsersectWithJump. 28 29 // ApplyFilter applies a filter to our UIDList. 30 func ApplyFilter(u *pb.List, f func(uint64, int) bool) { 31 out := u.Uids[:0] 32 for i, uid := range u.Uids { 33 if f(uid, i) { 34 out = append(out, uid) 35 } 36 } 37 u.Uids = out 38 } 39 40 // IntersectCompressedWith intersects a packed list of UIDs with another list 41 // and writes the output to o. 42 func IntersectCompressedWith(pack *pb.UidPack, afterUID uint64, v, o *pb.List) { 43 if pack == nil { 44 return 45 } 46 dec := codec.Decoder{Pack: pack} 47 dec.Seek(afterUID, codec.SeekStart) 48 n := dec.ApproxLen() 49 m := len(v.Uids) 50 51 if n > m { 52 n, m = m, n 53 } 54 dst := o.Uids[:0] 55 56 // If n equals 0, set it to 1 to avoid division by zero. 57 if n == 0 { 58 n = 1 59 } 60 61 // Select appropriate function based on heuristics. 62 ratio := float64(m) / float64(n) 63 if ratio < 500 { 64 IntersectCompressedWithLinJump(&dec, v.Uids, &dst) 65 } else { 66 IntersectCompressedWithBin(&dec, v.Uids, &dst) 67 } 68 o.Uids = dst 69 } 70 71 // IntersectCompressedWithLinJump performs the intersection linearly. 72 func IntersectCompressedWithLinJump(dec *codec.Decoder, v []uint64, o *[]uint64) { 73 m := len(v) 74 k := 0 75 _, off := IntersectWithLin(dec.Uids(), v[k:], o) 76 k += off 77 78 for k < m { 79 u := dec.LinearSeek(v[k]) 80 if len(u) == 0 { 81 break 82 } 83 _, off := IntersectWithLin(u, v[k:], o) 84 if off == 0 { 85 off = 1 // If v[k] isn't in u, move forward. 86 } 87 88 k += off 89 } 90 } 91 92 // IntersectCompressedWithBin is based on the paper 93 // "Fast Intersection Algorithms for Sorted Sequences" 94 // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3 95 func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) { 96 ld := dec.ApproxLen() 97 lq := len(q) 98 99 if ld == 0 || lq == 0 { 100 return 101 } 102 // Pick the shorter list and do binary search 103 if ld < lq { 104 uids := dec.Uids() 105 for len(uids) > 0 { 106 for _, u := range uids { 107 qidx := sort.Search(len(q), func(idx int) bool { 108 return q[idx] >= u 109 }) 110 if qidx >= len(q) { 111 return 112 } 113 if q[qidx] == u { 114 *o = append(*o, u) 115 qidx++ 116 } 117 q = q[qidx:] 118 } 119 uids = dec.Next() 120 } 121 return 122 } 123 124 for _, u := range q { 125 uids := dec.Seek(u, codec.SeekStart) 126 if len(uids) == 0 { 127 return 128 } 129 if uids[0] == u { 130 *o = append(*o, u) 131 } 132 } 133 } 134 135 // IntersectWith intersects u with v. The update is made to o. 136 // u, v should be sorted. 137 func IntersectWith(u, v, o *pb.List) { 138 n := len(u.Uids) 139 m := len(v.Uids) 140 141 if n > m { 142 n, m = m, n 143 } 144 if o.Uids == nil { 145 o.Uids = make([]uint64, 0, n) 146 } 147 dst := o.Uids[:0] 148 if n == 0 { 149 n = 1 150 } 151 // Select appropriate function based on heuristics. 152 ratio := float64(m) / float64(n) 153 if ratio < 100 { 154 IntersectWithLin(u.Uids, v.Uids, &dst) 155 } else if ratio < 500 { 156 IntersectWithJump(u.Uids, v.Uids, &dst) 157 } else { 158 IntersectWithBin(u.Uids, v.Uids, &dst) 159 } 160 o.Uids = dst 161 } 162 163 // IntersectWithLin performs the intersection linearly. 164 func IntersectWithLin(u, v []uint64, o *[]uint64) (int, int) { 165 n := len(u) 166 m := len(v) 167 i, k := 0, 0 168 for i < n && k < m { 169 uid := u[i] 170 vid := v[k] 171 if uid > vid { 172 for k = k + 1; k < m && v[k] < uid; k++ { 173 } 174 } else if uid == vid { 175 *o = append(*o, uid) 176 k++ 177 i++ 178 } else { 179 for i = i + 1; i < n && u[i] < vid; i++ { 180 } 181 } 182 } 183 return i, k 184 } 185 186 // IntersectWithJump performs the intersection linearly but jumping jump steps 187 // between iterations. 188 func IntersectWithJump(u, v []uint64, o *[]uint64) (int, int) { 189 n := len(u) 190 m := len(v) 191 i, k := 0, 0 192 for i < n && k < m { 193 uid := u[i] 194 vid := v[k] 195 if uid == vid { 196 *o = append(*o, uid) 197 k++ 198 i++ 199 } else if k+jump < m && uid > v[k+jump] { 200 k += jump 201 } else if i+jump < n && vid > u[i+jump] { 202 i += jump 203 } else if uid > vid { 204 for k = k + 1; k < m && v[k] < uid; k++ { 205 } 206 } else { 207 for i = i + 1; i < n && u[i] < vid; i++ { 208 } 209 } 210 } 211 return i, k 212 } 213 214 // IntersectWithBin is based on the paper 215 // "Fast Intersection Algorithms for Sorted Sequences" 216 // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3 217 func IntersectWithBin(d, q []uint64, o *[]uint64) { 218 ld := len(d) 219 lq := len(q) 220 221 if ld < lq { 222 ld, lq = lq, ld 223 d, q = q, d 224 } 225 if ld == 0 || lq == 0 || d[ld-1] < q[0] || q[lq-1] < d[0] { 226 return 227 } 228 229 val := d[0] 230 minq := sort.Search(len(q), func(i int) bool { 231 return q[i] >= val 232 }) 233 234 val = d[len(d)-1] 235 maxq := sort.Search(len(q), func(i int) bool { 236 return q[i] > val 237 }) 238 239 binIntersect(d, q[minq:maxq], o) 240 } 241 242 // binIntersect is the recursive function used. 243 // NOTE: len(d) >= len(q) (Must hold) 244 func binIntersect(d, q []uint64, final *[]uint64) { 245 if len(d) == 0 || len(q) == 0 { 246 return 247 } 248 midq := len(q) / 2 249 qval := q[midq] 250 midd := sort.Search(len(d), func(i int) bool { 251 return d[i] >= qval 252 }) 253 254 dd := d[0:midd] 255 qq := q[0:midq] 256 if len(dd) > len(qq) { // D > Q 257 binIntersect(dd, qq, final) 258 } else { 259 binIntersect(qq, dd, final) 260 } 261 262 if midd >= len(d) { 263 return 264 } 265 if d[midd] == qval { 266 *final = append(*final, qval) 267 } else { 268 midd-- 269 } 270 271 dd = d[midd+1:] 272 qq = q[midq+1:] 273 if len(dd) > len(qq) { // D > Q 274 binIntersect(dd, qq, final) 275 } else { 276 binIntersect(qq, dd, final) 277 } 278 } 279 280 type listInfo struct { 281 l *pb.List 282 length int 283 } 284 285 // IntersectSorted calculates the intersection of multiple lists and performs 286 // the intersections from the smallest to the largest list. 287 func IntersectSorted(lists []*pb.List) *pb.List { 288 if len(lists) == 0 { 289 return &pb.List{} 290 } 291 ls := make([]listInfo, 0, len(lists)) 292 for _, list := range lists { 293 ls = append(ls, listInfo{ 294 l: list, 295 length: len(list.Uids), 296 }) 297 } 298 // Sort the lists based on length. 299 sort.Slice(ls, func(i, j int) bool { 300 return ls[i].length < ls[j].length 301 }) 302 out := &pb.List{Uids: make([]uint64, ls[0].length)} 303 if len(ls) == 1 { 304 copy(out.Uids, ls[0].l.Uids) 305 return out 306 } 307 308 IntersectWith(ls[0].l, ls[1].l, out) 309 // Intersect from smallest to largest. 310 for i := 2; i < len(ls); i++ { 311 IntersectWith(out, ls[i].l, out) 312 // Break if we reach size 0 as we can no longer 313 // add any element. 314 if len(out.Uids) == 0 { 315 break 316 } 317 } 318 return out 319 } 320 321 // Difference returns the difference of two lists. 322 func Difference(u, v *pb.List) *pb.List { 323 if u == nil || v == nil { 324 return &pb.List{Uids: make([]uint64, 0)} 325 } 326 n := len(u.Uids) 327 m := len(v.Uids) 328 out := make([]uint64, 0, n/2) 329 i, k := 0, 0 330 for i < n && k < m { 331 uid := u.Uids[i] 332 vid := v.Uids[k] 333 if uid < vid { 334 for i < n && u.Uids[i] < vid { 335 out = append(out, u.Uids[i]) 336 i++ 337 } 338 } else if uid == vid { 339 i++ 340 k++ 341 } else { 342 for k = k + 1; k < m && v.Uids[k] < uid; k++ { 343 } 344 } 345 } 346 for i < n && k >= m { 347 out = append(out, u.Uids[i]) 348 i++ 349 } 350 return &pb.List{Uids: out} 351 } 352 353 // MergeSorted merges sorted lists. 354 func MergeSorted(lists []*pb.List) *pb.List { 355 if len(lists) == 0 { 356 return new(pb.List) 357 } 358 359 h := &uint64Heap{} 360 heap.Init(h) 361 maxSz := 0 362 363 for i, l := range lists { 364 if l == nil { 365 continue 366 } 367 lenList := len(l.Uids) 368 if lenList > 0 { 369 heap.Push(h, elem{ 370 val: l.Uids[0], 371 listIdx: i, 372 }) 373 if lenList > maxSz { 374 maxSz = lenList 375 } 376 } 377 } 378 379 // Our final output. Give it an approximate capacity as copies are expensive. 380 output := make([]uint64, 0, maxSz) 381 // idx[i] is the element we are looking at for lists[i]. 382 idx := make([]int, len(lists)) 383 var last uint64 // Last element added to sorted / final output. 384 for h.Len() > 0 { // While heap is not empty. 385 me := (*h)[0] // Peek at the top element in heap. 386 if len(output) == 0 || me.val != last { 387 output = append(output, me.val) // Add if unique. 388 last = me.val 389 } 390 l := lists[me.listIdx] 391 if idx[me.listIdx] >= len(l.Uids)-1 { 392 heap.Pop(h) 393 } else { 394 idx[me.listIdx]++ 395 val := l.Uids[idx[me.listIdx]] 396 (*h)[0].val = val 397 heap.Fix(h, 0) // Faster than Pop() followed by Push(). 398 } 399 } 400 return &pb.List{Uids: output} 401 } 402 403 // IndexOf performs a binary search on the uids slice and returns the index at 404 // which it finds the uid, else returns -1 405 func IndexOf(u *pb.List, uid uint64) int { 406 i := sort.Search(len(u.Uids), func(i int) bool { return u.Uids[i] >= uid }) 407 if i < len(u.Uids) && u.Uids[i] == uid { 408 return i 409 } 410 return -1 411 } 412 413 // ToUintsListForTest converts to list of uints for testing purpose only. 414 func ToUintsListForTest(ul []*pb.List) [][]uint64 { 415 out := make([][]uint64, 0, len(ul)) 416 for _, u := range ul { 417 out = append(out, u.Uids) 418 } 419 return out 420 }