github.com/unigraph-dev/dgraph@v1.1.1-0.20200923154953-8b52b426f765/algo/uidlist.go (about)

     1  /*
     2   * Copyright 2016-2018 Dgraph Labs, Inc. and Contributors
     3   *
     4   * Licensed under the Apache License, Version 2.0 (the "License");
     5   * you may not use this file except in compliance with the License.
     6   * You may obtain a copy of the License at
     7   *
     8   *     http://www.apache.org/licenses/LICENSE-2.0
     9   *
    10   * Unless required by applicable law or agreed to in writing, software
    11   * distributed under the License is distributed on an "AS IS" BASIS,
    12   * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    13   * See the License for the specific language governing permissions and
    14   * limitations under the License.
    15   */
    16  
    17  package algo
    18  
    19  import (
    20  	"container/heap"
    21  	"sort"
    22  
    23  	"github.com/dgraph-io/dgraph/codec"
    24  	"github.com/dgraph-io/dgraph/protos/pb"
    25  )
    26  
    27  const jump = 32 // Jump size in InsersectWithJump.
    28  
    29  // ApplyFilter applies a filter to our UIDList.
    30  func ApplyFilter(u *pb.List, f func(uint64, int) bool) {
    31  	out := u.Uids[:0]
    32  	for i, uid := range u.Uids {
    33  		if f(uid, i) {
    34  			out = append(out, uid)
    35  		}
    36  	}
    37  	u.Uids = out
    38  }
    39  
    40  // IntersectCompressedWith intersects a packed list of UIDs with another list
    41  // and writes the output to o.
    42  func IntersectCompressedWith(pack *pb.UidPack, afterUID uint64, v, o *pb.List) {
    43  	if pack == nil {
    44  		return
    45  	}
    46  	dec := codec.Decoder{Pack: pack}
    47  	dec.Seek(afterUID, codec.SeekStart)
    48  	n := dec.ApproxLen()
    49  	m := len(v.Uids)
    50  
    51  	if n > m {
    52  		n, m = m, n
    53  	}
    54  	dst := o.Uids[:0]
    55  
    56  	// If n equals 0, set it to 1 to avoid division by zero.
    57  	if n == 0 {
    58  		n = 1
    59  	}
    60  
    61  	// Select appropriate function based on heuristics.
    62  	ratio := float64(m) / float64(n)
    63  	if ratio < 500 {
    64  		IntersectCompressedWithLinJump(&dec, v.Uids, &dst)
    65  	} else {
    66  		IntersectCompressedWithBin(&dec, v.Uids, &dst)
    67  	}
    68  	o.Uids = dst
    69  }
    70  
    71  // IntersectCompressedWithLinJump performs the intersection linearly.
    72  func IntersectCompressedWithLinJump(dec *codec.Decoder, v []uint64, o *[]uint64) {
    73  	m := len(v)
    74  	k := 0
    75  	_, off := IntersectWithLin(dec.Uids(), v[k:], o)
    76  	k += off
    77  
    78  	for k < m {
    79  		u := dec.LinearSeek(v[k])
    80  		if len(u) == 0 {
    81  			break
    82  		}
    83  		_, off := IntersectWithLin(u, v[k:], o)
    84  		if off == 0 {
    85  			off = 1 // If v[k] isn't in u, move forward.
    86  		}
    87  
    88  		k += off
    89  	}
    90  }
    91  
    92  // IntersectCompressedWithBin is based on the paper
    93  // "Fast Intersection Algorithms for Sorted Sequences"
    94  // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3
    95  func IntersectCompressedWithBin(dec *codec.Decoder, q []uint64, o *[]uint64) {
    96  	ld := dec.ApproxLen()
    97  	lq := len(q)
    98  
    99  	if ld == 0 || lq == 0 {
   100  		return
   101  	}
   102  	// Pick the shorter list and do binary search
   103  	if ld < lq {
   104  		uids := dec.Uids()
   105  		for len(uids) > 0 {
   106  			for _, u := range uids {
   107  				qidx := sort.Search(len(q), func(idx int) bool {
   108  					return q[idx] >= u
   109  				})
   110  				if qidx >= len(q) {
   111  					return
   112  				}
   113  				if q[qidx] == u {
   114  					*o = append(*o, u)
   115  					qidx++
   116  				}
   117  				q = q[qidx:]
   118  			}
   119  			uids = dec.Next()
   120  		}
   121  		return
   122  	}
   123  
   124  	for _, u := range q {
   125  		uids := dec.Seek(u, codec.SeekStart)
   126  		if len(uids) == 0 {
   127  			return
   128  		}
   129  		if uids[0] == u {
   130  			*o = append(*o, u)
   131  		}
   132  	}
   133  }
   134  
   135  // IntersectWith intersects u with v. The update is made to o.
   136  // u, v should be sorted.
   137  func IntersectWith(u, v, o *pb.List) {
   138  	n := len(u.Uids)
   139  	m := len(v.Uids)
   140  
   141  	if n > m {
   142  		n, m = m, n
   143  	}
   144  	if o.Uids == nil {
   145  		o.Uids = make([]uint64, 0, n)
   146  	}
   147  	dst := o.Uids[:0]
   148  	if n == 0 {
   149  		n = 1
   150  	}
   151  	// Select appropriate function based on heuristics.
   152  	ratio := float64(m) / float64(n)
   153  	if ratio < 100 {
   154  		IntersectWithLin(u.Uids, v.Uids, &dst)
   155  	} else if ratio < 500 {
   156  		IntersectWithJump(u.Uids, v.Uids, &dst)
   157  	} else {
   158  		IntersectWithBin(u.Uids, v.Uids, &dst)
   159  	}
   160  	o.Uids = dst
   161  }
   162  
   163  // IntersectWithLin performs the intersection linearly.
   164  func IntersectWithLin(u, v []uint64, o *[]uint64) (int, int) {
   165  	n := len(u)
   166  	m := len(v)
   167  	i, k := 0, 0
   168  	for i < n && k < m {
   169  		uid := u[i]
   170  		vid := v[k]
   171  		if uid > vid {
   172  			for k = k + 1; k < m && v[k] < uid; k++ {
   173  			}
   174  		} else if uid == vid {
   175  			*o = append(*o, uid)
   176  			k++
   177  			i++
   178  		} else {
   179  			for i = i + 1; i < n && u[i] < vid; i++ {
   180  			}
   181  		}
   182  	}
   183  	return i, k
   184  }
   185  
   186  // IntersectWithJump performs the intersection linearly but jumping jump steps
   187  // between iterations.
   188  func IntersectWithJump(u, v []uint64, o *[]uint64) (int, int) {
   189  	n := len(u)
   190  	m := len(v)
   191  	i, k := 0, 0
   192  	for i < n && k < m {
   193  		uid := u[i]
   194  		vid := v[k]
   195  		if uid == vid {
   196  			*o = append(*o, uid)
   197  			k++
   198  			i++
   199  		} else if k+jump < m && uid > v[k+jump] {
   200  			k += jump
   201  		} else if i+jump < n && vid > u[i+jump] {
   202  			i += jump
   203  		} else if uid > vid {
   204  			for k = k + 1; k < m && v[k] < uid; k++ {
   205  			}
   206  		} else {
   207  			for i = i + 1; i < n && u[i] < vid; i++ {
   208  			}
   209  		}
   210  	}
   211  	return i, k
   212  }
   213  
   214  // IntersectWithBin is based on the paper
   215  // "Fast Intersection Algorithms for Sorted Sequences"
   216  // https://link.springer.com/chapter/10.1007/978-3-642-12476-1_3
   217  func IntersectWithBin(d, q []uint64, o *[]uint64) {
   218  	ld := len(d)
   219  	lq := len(q)
   220  
   221  	if ld < lq {
   222  		ld, lq = lq, ld
   223  		d, q = q, d
   224  	}
   225  	if ld == 0 || lq == 0 || d[ld-1] < q[0] || q[lq-1] < d[0] {
   226  		return
   227  	}
   228  
   229  	val := d[0]
   230  	minq := sort.Search(len(q), func(i int) bool {
   231  		return q[i] >= val
   232  	})
   233  
   234  	val = d[len(d)-1]
   235  	maxq := sort.Search(len(q), func(i int) bool {
   236  		return q[i] > val
   237  	})
   238  
   239  	binIntersect(d, q[minq:maxq], o)
   240  }
   241  
   242  // binIntersect is the recursive function used.
   243  // NOTE: len(d) >= len(q) (Must hold)
   244  func binIntersect(d, q []uint64, final *[]uint64) {
   245  	if len(d) == 0 || len(q) == 0 {
   246  		return
   247  	}
   248  	midq := len(q) / 2
   249  	qval := q[midq]
   250  	midd := sort.Search(len(d), func(i int) bool {
   251  		return d[i] >= qval
   252  	})
   253  
   254  	dd := d[0:midd]
   255  	qq := q[0:midq]
   256  	if len(dd) > len(qq) { // D > Q
   257  		binIntersect(dd, qq, final)
   258  	} else {
   259  		binIntersect(qq, dd, final)
   260  	}
   261  
   262  	if midd >= len(d) {
   263  		return
   264  	}
   265  	if d[midd] == qval {
   266  		*final = append(*final, qval)
   267  	} else {
   268  		midd--
   269  	}
   270  
   271  	dd = d[midd+1:]
   272  	qq = q[midq+1:]
   273  	if len(dd) > len(qq) { // D > Q
   274  		binIntersect(dd, qq, final)
   275  	} else {
   276  		binIntersect(qq, dd, final)
   277  	}
   278  }
   279  
   280  type listInfo struct {
   281  	l      *pb.List
   282  	length int
   283  }
   284  
   285  // IntersectSorted calculates the intersection of multiple lists and performs
   286  // the intersections from the smallest to the largest list.
   287  func IntersectSorted(lists []*pb.List) *pb.List {
   288  	if len(lists) == 0 {
   289  		return &pb.List{}
   290  	}
   291  	ls := make([]listInfo, 0, len(lists))
   292  	for _, list := range lists {
   293  		ls = append(ls, listInfo{
   294  			l:      list,
   295  			length: len(list.Uids),
   296  		})
   297  	}
   298  	// Sort the lists based on length.
   299  	sort.Slice(ls, func(i, j int) bool {
   300  		return ls[i].length < ls[j].length
   301  	})
   302  	out := &pb.List{Uids: make([]uint64, ls[0].length)}
   303  	if len(ls) == 1 {
   304  		copy(out.Uids, ls[0].l.Uids)
   305  		return out
   306  	}
   307  
   308  	IntersectWith(ls[0].l, ls[1].l, out)
   309  	// Intersect from smallest to largest.
   310  	for i := 2; i < len(ls); i++ {
   311  		IntersectWith(out, ls[i].l, out)
   312  		// Break if we reach size 0 as we can no longer
   313  		// add any element.
   314  		if len(out.Uids) == 0 {
   315  			break
   316  		}
   317  	}
   318  	return out
   319  }
   320  
   321  // Difference returns the difference of two lists.
   322  func Difference(u, v *pb.List) *pb.List {
   323  	if u == nil || v == nil {
   324  		return &pb.List{Uids: make([]uint64, 0)}
   325  	}
   326  	n := len(u.Uids)
   327  	m := len(v.Uids)
   328  	out := make([]uint64, 0, n/2)
   329  	i, k := 0, 0
   330  	for i < n && k < m {
   331  		uid := u.Uids[i]
   332  		vid := v.Uids[k]
   333  		if uid < vid {
   334  			for i < n && u.Uids[i] < vid {
   335  				out = append(out, u.Uids[i])
   336  				i++
   337  			}
   338  		} else if uid == vid {
   339  			i++
   340  			k++
   341  		} else {
   342  			for k = k + 1; k < m && v.Uids[k] < uid; k++ {
   343  			}
   344  		}
   345  	}
   346  	for i < n && k >= m {
   347  		out = append(out, u.Uids[i])
   348  		i++
   349  	}
   350  	return &pb.List{Uids: out}
   351  }
   352  
   353  // MergeSorted merges sorted lists.
   354  func MergeSorted(lists []*pb.List) *pb.List {
   355  	if len(lists) == 0 {
   356  		return new(pb.List)
   357  	}
   358  
   359  	h := &uint64Heap{}
   360  	heap.Init(h)
   361  	maxSz := 0
   362  
   363  	for i, l := range lists {
   364  		if l == nil {
   365  			continue
   366  		}
   367  		lenList := len(l.Uids)
   368  		if lenList > 0 {
   369  			heap.Push(h, elem{
   370  				val:     l.Uids[0],
   371  				listIdx: i,
   372  			})
   373  			if lenList > maxSz {
   374  				maxSz = lenList
   375  			}
   376  		}
   377  	}
   378  
   379  	// Our final output. Give it an approximate capacity as copies are expensive.
   380  	output := make([]uint64, 0, maxSz)
   381  	// idx[i] is the element we are looking at for lists[i].
   382  	idx := make([]int, len(lists))
   383  	var last uint64   // Last element added to sorted / final output.
   384  	for h.Len() > 0 { // While heap is not empty.
   385  		me := (*h)[0] // Peek at the top element in heap.
   386  		if len(output) == 0 || me.val != last {
   387  			output = append(output, me.val) // Add if unique.
   388  			last = me.val
   389  		}
   390  		l := lists[me.listIdx]
   391  		if idx[me.listIdx] >= len(l.Uids)-1 {
   392  			heap.Pop(h)
   393  		} else {
   394  			idx[me.listIdx]++
   395  			val := l.Uids[idx[me.listIdx]]
   396  			(*h)[0].val = val
   397  			heap.Fix(h, 0) // Faster than Pop() followed by Push().
   398  		}
   399  	}
   400  	return &pb.List{Uids: output}
   401  }
   402  
   403  // IndexOf performs a binary search on the uids slice and returns the index at
   404  // which it finds the uid, else returns -1
   405  func IndexOf(u *pb.List, uid uint64) int {
   406  	i := sort.Search(len(u.Uids), func(i int) bool { return u.Uids[i] >= uid })
   407  	if i < len(u.Uids) && u.Uids[i] == uid {
   408  		return i
   409  	}
   410  	return -1
   411  }
   412  
   413  // ToUintsListForTest converts to list of uints for testing purpose only.
   414  func ToUintsListForTest(ul []*pb.List) [][]uint64 {
   415  	out := make([][]uint64, 0, len(ul))
   416  	for _, u := range ul {
   417  		out = append(out, u.Uids)
   418  	}
   419  	return out
   420  }