github.com/weaviate/sroar@v0.0.0-20230210105426-26108af5465d/setutil.go (about)

     1  // Copyright 2016 by the roaring authors.
     2  // Licensed under the Apache License, Version 2.0.
     3  // Full version of the license is here:
     4  // https://github.com/RoaringBitmap/roaring/blob/master/LICENSE
     5  
     6  package sroar
     7  
     8  // TODO: Add license from roaring bitmap library.
     9  
    10  func min(a, b int) int {
    11  	if a < b {
    12  		return a
    13  	}
    14  	return b
    15  }
    16  func max(a, b int) int {
    17  	if a > b {
    18  		return a
    19  	}
    20  	return b
    21  }
    22  
    23  func equal(a, b []uint16) bool {
    24  	if len(a) != len(b) {
    25  		return false
    26  	}
    27  	for i := range a {
    28  		if a[i] != b[i] {
    29  			return false
    30  		}
    31  	}
    32  	return true
    33  }
    34  
    35  func difference(set1 []uint16, set2 []uint16, buffer []uint16) int {
    36  	if 0 == len(set2) {
    37  		buffer = buffer[:len(set1)]
    38  		for k := 0; k < len(set1); k++ {
    39  			buffer[k] = set1[k]
    40  		}
    41  		return len(set1)
    42  	}
    43  	if 0 == len(set1) {
    44  		return 0
    45  	}
    46  	pos := 0
    47  	k1 := 0
    48  	k2 := 0
    49  	buffer = buffer[:cap(buffer)]
    50  	s1 := set1[k1]
    51  	s2 := set2[k2]
    52  	for {
    53  		if s1 < s2 {
    54  			buffer[pos] = s1
    55  			pos++
    56  			k1++
    57  			if k1 >= len(set1) {
    58  				break
    59  			}
    60  			s1 = set1[k1]
    61  		} else if s1 == s2 {
    62  			k1++
    63  			k2++
    64  			if k1 >= len(set1) {
    65  				break
    66  			}
    67  			s1 = set1[k1]
    68  			if k2 >= len(set2) {
    69  				for ; k1 < len(set1); k1++ {
    70  					buffer[pos] = set1[k1]
    71  					pos++
    72  				}
    73  				break
    74  			}
    75  			s2 = set2[k2]
    76  		} else { // if (val1>val2)
    77  			k2++
    78  			if k2 >= len(set2) {
    79  				for ; k1 < len(set1); k1++ {
    80  					buffer[pos] = set1[k1]
    81  					pos++
    82  				}
    83  				break
    84  			}
    85  			s2 = set2[k2]
    86  		}
    87  	}
    88  	return pos
    89  
    90  }
    91  
    92  func exclusiveUnion2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
    93  	if 0 == len(set2) {
    94  		buffer = buffer[:len(set1)]
    95  		copy(buffer, set1[:])
    96  		return len(set1)
    97  	}
    98  	if 0 == len(set1) {
    99  		buffer = buffer[:len(set2)]
   100  		copy(buffer, set2[:])
   101  		return len(set2)
   102  	}
   103  	pos := 0
   104  	k1 := 0
   105  	k2 := 0
   106  	s1 := set1[k1]
   107  	s2 := set2[k2]
   108  	buffer = buffer[:cap(buffer)]
   109  	for {
   110  		if s1 < s2 {
   111  			buffer[pos] = s1
   112  			pos++
   113  			k1++
   114  			if k1 >= len(set1) {
   115  				for ; k2 < len(set2); k2++ {
   116  					buffer[pos] = set2[k2]
   117  					pos++
   118  				}
   119  				break
   120  			}
   121  			s1 = set1[k1]
   122  		} else if s1 == s2 {
   123  			k1++
   124  			k2++
   125  			if k1 >= len(set1) {
   126  				for ; k2 < len(set2); k2++ {
   127  					buffer[pos] = set2[k2]
   128  					pos++
   129  				}
   130  				break
   131  			}
   132  			if k2 >= len(set2) {
   133  				for ; k1 < len(set1); k1++ {
   134  					buffer[pos] = set1[k1]
   135  					pos++
   136  				}
   137  				break
   138  			}
   139  			s1 = set1[k1]
   140  			s2 = set2[k2]
   141  		} else { // if (val1>val2)
   142  			buffer[pos] = s2
   143  			pos++
   144  			k2++
   145  			if k2 >= len(set2) {
   146  				for ; k1 < len(set1); k1++ {
   147  					buffer[pos] = set1[k1]
   148  					pos++
   149  				}
   150  				break
   151  			}
   152  			s2 = set2[k2]
   153  		}
   154  	}
   155  	return pos
   156  }
   157  
   158  func union2by2Cardinality(set1 []uint16, set2 []uint16) int {
   159  	pos := 0
   160  	k1 := 0
   161  	k2 := 0
   162  	if 0 == len(set2) {
   163  		return len(set1)
   164  	}
   165  	if 0 == len(set1) {
   166  		return len(set2)
   167  	}
   168  	s1 := set1[k1]
   169  	s2 := set2[k2]
   170  	for {
   171  		if s1 < s2 {
   172  			pos++
   173  			k1++
   174  			if k1 >= len(set1) {
   175  				pos += len(set2) - k2
   176  				break
   177  			}
   178  			s1 = set1[k1]
   179  		} else if s1 == s2 {
   180  			pos++
   181  			k1++
   182  			k2++
   183  			if k1 >= len(set1) {
   184  				pos += len(set2) - k2
   185  				break
   186  			}
   187  			if k2 >= len(set2) {
   188  				pos += len(set1) - k1
   189  				break
   190  			}
   191  			s1 = set1[k1]
   192  			s2 = set2[k2]
   193  		} else { // if (set1[k1]>set2[k2])
   194  			pos++
   195  			k2++
   196  			if k2 >= len(set2) {
   197  				pos += len(set1) - k1
   198  				break
   199  			}
   200  			s2 = set2[k2]
   201  		}
   202  	}
   203  	return pos
   204  }
   205  
   206  func intersection2by2(
   207  	set1 []uint16,
   208  	set2 []uint16,
   209  	buffer []uint16) int {
   210  
   211  	if len(set1)*64 < len(set2) {
   212  		return onesidedgallopingintersect2by2(set1, set2, buffer)
   213  	} else if len(set2)*64 < len(set1) {
   214  		return onesidedgallopingintersect2by2(set2, set1, buffer)
   215  	} else {
   216  		return localintersect2by2(set1, set2, buffer)
   217  	}
   218  }
   219  
   220  func intersection2by2Cardinality(
   221  	set1 []uint16,
   222  	set2 []uint16) int {
   223  
   224  	if len(set1)*64 < len(set2) {
   225  		return onesidedgallopingintersect2by2Cardinality(set1, set2)
   226  	} else if len(set2)*64 < len(set1) {
   227  		return onesidedgallopingintersect2by2Cardinality(set2, set1)
   228  	} else {
   229  		return localintersect2by2Cardinality(set1, set2)
   230  	}
   231  }
   232  
   233  func intersects2by2(
   234  	set1 []uint16,
   235  	set2 []uint16) bool {
   236  	// could be optimized if one set is much larger than the other one
   237  	if (0 == len(set1)) || (0 == len(set2)) {
   238  		return false
   239  	}
   240  	k1 := 0
   241  	k2 := 0
   242  	s1 := set1[k1]
   243  	s2 := set2[k2]
   244  mainwhile:
   245  	for {
   246  
   247  		if s2 < s1 {
   248  			for {
   249  				k2++
   250  				if k2 == len(set2) {
   251  					break mainwhile
   252  				}
   253  				s2 = set2[k2]
   254  				if s2 >= s1 {
   255  					break
   256  				}
   257  			}
   258  		}
   259  		if s1 < s2 {
   260  			for {
   261  				k1++
   262  				if k1 == len(set1) {
   263  					break mainwhile
   264  				}
   265  				s1 = set1[k1]
   266  				if s1 >= s2 {
   267  					break
   268  				}
   269  			}
   270  
   271  		} else {
   272  			// (set2[k2] == set1[k1])
   273  			return true
   274  		}
   275  	}
   276  	return false
   277  }
   278  
   279  func localintersect2by2(
   280  	set1 []uint16,
   281  	set2 []uint16,
   282  	buffer []uint16) int {
   283  
   284  	if (0 == len(set1)) || (0 == len(set2)) {
   285  		return 0
   286  	}
   287  	k1 := 0
   288  	k2 := 0
   289  	pos := 0
   290  	buffer = buffer[:cap(buffer)]
   291  	s1 := set1[k1]
   292  	s2 := set2[k2]
   293  mainwhile:
   294  	for {
   295  		if s2 < s1 {
   296  			for {
   297  				k2++
   298  				if k2 == len(set2) {
   299  					break mainwhile
   300  				}
   301  				s2 = set2[k2]
   302  				if s2 >= s1 {
   303  					break
   304  				}
   305  			}
   306  		}
   307  		if s1 < s2 {
   308  			for {
   309  				k1++
   310  				if k1 == len(set1) {
   311  					break mainwhile
   312  				}
   313  				s1 = set1[k1]
   314  				if s1 >= s2 {
   315  					break
   316  				}
   317  			}
   318  
   319  		} else {
   320  			// (set2[k2] == set1[k1])
   321  			buffer[pos] = s1
   322  			pos++
   323  			k1++
   324  			if k1 == len(set1) {
   325  				break
   326  			}
   327  			s1 = set1[k1]
   328  			k2++
   329  			if k2 == len(set2) {
   330  				break
   331  			}
   332  			s2 = set2[k2]
   333  		}
   334  	}
   335  	return pos
   336  }
   337  
   338  func localintersect2by2Cardinality(
   339  	set1 []uint16,
   340  	set2 []uint16) int {
   341  
   342  	if (0 == len(set1)) || (0 == len(set2)) {
   343  		return 0
   344  	}
   345  	k1 := 0
   346  	k2 := 0
   347  	pos := 0
   348  	s1 := set1[k1]
   349  	s2 := set2[k2]
   350  mainwhile:
   351  	for {
   352  		if s2 < s1 {
   353  			for {
   354  				k2++
   355  				if k2 == len(set2) {
   356  					break mainwhile
   357  				}
   358  				s2 = set2[k2]
   359  				if s2 >= s1 {
   360  					break
   361  				}
   362  			}
   363  		}
   364  		if s1 < s2 {
   365  			for {
   366  				k1++
   367  				if k1 == len(set1) {
   368  					break mainwhile
   369  				}
   370  				s1 = set1[k1]
   371  				if s1 >= s2 {
   372  					break
   373  				}
   374  			}
   375  
   376  		} else {
   377  			// (set2[k2] == set1[k1])
   378  			pos++
   379  			k1++
   380  			if k1 == len(set1) {
   381  				break
   382  			}
   383  			s1 = set1[k1]
   384  			k2++
   385  			if k2 == len(set2) {
   386  				break
   387  			}
   388  			s2 = set2[k2]
   389  		}
   390  	}
   391  	return pos
   392  }
   393  
   394  func advanceUntil(
   395  	array []uint16,
   396  	pos int,
   397  	length int,
   398  	min uint16) int {
   399  	lower := pos + 1
   400  
   401  	if lower >= length || array[lower] >= min {
   402  		return lower
   403  	}
   404  
   405  	spansize := 1
   406  
   407  	for lower+spansize < length && array[lower+spansize] < min {
   408  		spansize *= 2
   409  	}
   410  	var upper int
   411  	if lower+spansize < length {
   412  		upper = lower + spansize
   413  	} else {
   414  		upper = length - 1
   415  	}
   416  
   417  	if array[upper] == min {
   418  		return upper
   419  	}
   420  
   421  	if array[upper] < min {
   422  		// means
   423  		// array
   424  		// has no
   425  		// item
   426  		// >= min
   427  		// pos = array.length;
   428  		return length
   429  	}
   430  
   431  	// we know that the next-smallest span was too small
   432  	lower += (spansize >> 1)
   433  
   434  	mid := 0
   435  	for lower+1 != upper {
   436  		mid = (lower + upper) >> 1
   437  		if array[mid] == min {
   438  			return mid
   439  		} else if array[mid] < min {
   440  			lower = mid
   441  		} else {
   442  			upper = mid
   443  		}
   444  	}
   445  	return upper
   446  
   447  }
   448  
   449  func onesidedgallopingintersect2by2(
   450  	smallset []uint16,
   451  	largeset []uint16,
   452  	buffer []uint16) int {
   453  
   454  	if 0 == len(smallset) {
   455  		return 0
   456  	}
   457  	buffer = buffer[:cap(buffer)]
   458  	k1 := 0
   459  	k2 := 0
   460  	pos := 0
   461  	s1 := largeset[k1]
   462  	s2 := smallset[k2]
   463  mainwhile:
   464  
   465  	for {
   466  		if s1 < s2 {
   467  			k1 = advanceUntil(largeset, k1, len(largeset), s2)
   468  			if k1 == len(largeset) {
   469  				break mainwhile
   470  			}
   471  			s1 = largeset[k1]
   472  		}
   473  		if s2 < s1 {
   474  			k2++
   475  			if k2 == len(smallset) {
   476  				break mainwhile
   477  			}
   478  			s2 = smallset[k2]
   479  		} else {
   480  
   481  			buffer[pos] = s2
   482  			pos++
   483  			k2++
   484  			if k2 == len(smallset) {
   485  				break
   486  			}
   487  			s2 = smallset[k2]
   488  			k1 = advanceUntil(largeset, k1, len(largeset), s2)
   489  			if k1 == len(largeset) {
   490  				break mainwhile
   491  			}
   492  			s1 = largeset[k1]
   493  		}
   494  
   495  	}
   496  	return pos
   497  }
   498  
   499  func onesidedgallopingintersect2by2Cardinality(
   500  	smallset []uint16,
   501  	largeset []uint16) int {
   502  
   503  	if 0 == len(smallset) {
   504  		return 0
   505  	}
   506  	k1 := 0
   507  	k2 := 0
   508  	pos := 0
   509  	s1 := largeset[k1]
   510  	s2 := smallset[k2]
   511  mainwhile:
   512  
   513  	for {
   514  		if s1 < s2 {
   515  			k1 = advanceUntil(largeset, k1, len(largeset), s2)
   516  			if k1 == len(largeset) {
   517  				break mainwhile
   518  			}
   519  			s1 = largeset[k1]
   520  		}
   521  		if s2 < s1 {
   522  			k2++
   523  			if k2 == len(smallset) {
   524  				break mainwhile
   525  			}
   526  			s2 = smallset[k2]
   527  		} else {
   528  
   529  			pos++
   530  			k2++
   531  			if k2 == len(smallset) {
   532  				break
   533  			}
   534  			s2 = smallset[k2]
   535  			k1 = advanceUntil(largeset, k1, len(largeset), s2)
   536  			if k1 == len(largeset) {
   537  				break mainwhile
   538  			}
   539  			s1 = largeset[k1]
   540  		}
   541  
   542  	}
   543  	return pos
   544  }
   545  
   546  func binarySearch(array []uint16, ikey uint16) int {
   547  	low := 0
   548  	high := len(array) - 1
   549  	for low+16 <= high {
   550  		middleIndex := int(uint32(low+high) >> 1)
   551  		middleValue := array[middleIndex]
   552  		if middleValue < ikey {
   553  			low = middleIndex + 1
   554  		} else if middleValue > ikey {
   555  			high = middleIndex - 1
   556  		} else {
   557  			return middleIndex
   558  		}
   559  	}
   560  	for ; low <= high; low++ {
   561  		val := array[low]
   562  		if val >= ikey {
   563  			if val == ikey {
   564  				return low
   565  			}
   566  			break
   567  		}
   568  	}
   569  	return -(low + 1)
   570  }
   571  
   572  func union2by2(set1 []uint16, set2 []uint16, buffer []uint16) int {
   573  	pos := 0
   574  	k1 := 0
   575  	k2 := 0
   576  	if 0 == len(set2) {
   577  		buffer = buffer[:len(set1)]
   578  		copy(buffer, set1[:])
   579  		return len(set1)
   580  	}
   581  	if 0 == len(set1) {
   582  		buffer = buffer[:len(set2)]
   583  		copy(buffer, set2[:])
   584  		return len(set2)
   585  	}
   586  	s1 := set1[k1]
   587  	s2 := set2[k2]
   588  	buffer = buffer[:cap(buffer)]
   589  	for {
   590  		if s1 < s2 {
   591  			buffer[pos] = s1
   592  			pos++
   593  			k1++
   594  			if k1 >= len(set1) {
   595  				copy(buffer[pos:], set2[k2:])
   596  				pos += len(set2) - k2
   597  				break
   598  			}
   599  			s1 = set1[k1]
   600  		} else if s1 == s2 {
   601  			buffer[pos] = s1
   602  			pos++
   603  			k1++
   604  			k2++
   605  			if k1 >= len(set1) {
   606  				copy(buffer[pos:], set2[k2:])
   607  				pos += len(set2) - k2
   608  				break
   609  			}
   610  			if k2 >= len(set2) {
   611  				copy(buffer[pos:], set1[k1:])
   612  				pos += len(set1) - k1
   613  				break
   614  			}
   615  			s1 = set1[k1]
   616  			s2 = set2[k2]
   617  		} else { // if (set1[k1]>set2[k2])
   618  			buffer[pos] = s2
   619  			pos++
   620  			k2++
   621  			if k2 >= len(set2) {
   622  				copy(buffer[pos:], set1[k1:])
   623  				pos += len(set1) - k1
   624  				break
   625  			}
   626  			s2 = set2[k2]
   627  		}
   628  	}
   629  	return pos
   630  }