github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/delta_analyzer.go (about)

     1  //                           _       _
     2  // __      _____  __ ___   ___  __ _| |_ ___
     3  // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \
     4  //  \ V  V /  __/ (_| |\ V /| | (_| | ||  __/
     5  //   \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___|
     6  //
     7  //  Copyright © 2016 - 2024 Weaviate B.V. All rights reserved.
     8  //
     9  //  CONTACT: hello@weaviate.io
    10  //
    11  
    12  package inverted
    13  
    14  import "bytes"
    15  
    16  type DeltaResults struct {
    17  	ToDelete []Property
    18  	ToAdd    []Property
    19  }
    20  
    21  func Delta(previous, next []Property) DeltaResults {
    22  	out := DeltaResults{}
    23  
    24  	previous = DedupItems(previous)
    25  	next = DedupItems(next)
    26  
    27  	if previous == nil {
    28  		out.ToAdd = next
    29  		return out
    30  	}
    31  
    32  	previousByProp := map[string]Property{}
    33  	for _, prevProp := range previous {
    34  		previousByProp[prevProp.Name] = prevProp
    35  	}
    36  
    37  	for _, nextProp := range next {
    38  		prevProp, ok := previousByProp[nextProp.Name]
    39  		if !ok {
    40  			// this prop didn't exist before so we can add all of it
    41  			out.ToAdd = append(out.ToAdd, nextProp)
    42  			continue
    43  		}
    44  		delete(previousByProp, nextProp.Name)
    45  
    46  		// there is a chance they're identical, such a check is pretty cheap and
    47  		// it could prevent us from running an expensive merge, so let's try our
    48  		// luck
    49  		if listsIdentical(prevProp.Items, nextProp.Items) {
    50  			// then we don't need to do anything about this prop
    51  			continue
    52  		}
    53  
    54  		toAdd, toDelete := countableDelta(prevProp.Items, nextProp.Items)
    55  		if len(toAdd) > 0 {
    56  			out.ToAdd = append(out.ToAdd, Property{
    57  				Name:               nextProp.Name,
    58  				Items:              toAdd,
    59  				Length:             nextProp.Length,
    60  				HasFilterableIndex: nextProp.HasFilterableIndex,
    61  				HasSearchableIndex: nextProp.HasSearchableIndex,
    62  			})
    63  		}
    64  		if len(toDelete) > 0 {
    65  			out.ToDelete = append(out.ToDelete, Property{
    66  				Name:               nextProp.Name,
    67  				Items:              toDelete,
    68  				Length:             prevProp.Length,
    69  				HasFilterableIndex: nextProp.HasFilterableIndex,
    70  				HasSearchableIndex: nextProp.HasSearchableIndex,
    71  			})
    72  		}
    73  		// special case to update optional length/nil indexes on
    74  		// all values removed
    75  		if len(toAdd) == 0 && len(toDelete) > 0 &&
    76  			nextProp.Length == 0 && prevProp.Length > 0 {
    77  			out.ToAdd = append(out.ToAdd, Property{
    78  				Name:               nextProp.Name,
    79  				Items:              []Countable{},
    80  				Length:             0,
    81  				HasFilterableIndex: nextProp.HasFilterableIndex,
    82  				HasSearchableIndex: nextProp.HasSearchableIndex,
    83  			})
    84  		}
    85  	}
    86  
    87  	// extend ToDelete with props from previous missing in next
    88  	for _, prevProp := range previous {
    89  		if _, ok := previousByProp[prevProp.Name]; ok {
    90  			out.ToDelete = append(out.ToDelete, prevProp)
    91  		}
    92  	}
    93  
    94  	return out
    95  }
    96  
    97  func countableDelta(prev, next []Countable) ([]Countable, []Countable) {
    98  	var (
    99  		add []Countable
   100  		del []Countable
   101  	)
   102  
   103  	seenInPrev := map[string]Countable{}
   104  
   105  	for _, prevItem := range prev {
   106  		seenInPrev[string(prevItem.Data)] = prevItem
   107  	}
   108  
   109  	for _, nextItem := range next {
   110  		prev, ok := seenInPrev[string(nextItem.Data)]
   111  		if ok && prev.TermFrequency == nextItem.TermFrequency {
   112  			// we have an identical overlap, delete from old list
   113  			delete(seenInPrev, string(nextItem.Data))
   114  			// don't add to new list
   115  			continue
   116  		}
   117  
   118  		add = append(add, nextItem)
   119  	}
   120  
   121  	// anything that's now left on the seenInPrev map must be deleted because
   122  	// it either
   123  	// - is no longer present
   124  	// - is still present, but with updated values
   125  	for _, prevItem := range prev {
   126  		if _, ok := seenInPrev[string(prevItem.Data)]; ok {
   127  			del = append(del, prevItem)
   128  		}
   129  	}
   130  
   131  	return add, del
   132  }
   133  
   134  func listsIdentical(a []Countable, b []Countable) bool {
   135  	if len(a) != len(b) {
   136  		// can't possibly be identical if they have different lengths, exit early
   137  		return false
   138  	}
   139  
   140  	for i := range a {
   141  		if !bytes.Equal(a[i].Data, b[i].Data) ||
   142  			a[i].TermFrequency != b[i].TermFrequency {
   143  			// return as soon as an item didn't match
   144  			return false
   145  		}
   146  	}
   147  
   148  	// we have proven in O(n) time that both lists are identical
   149  	// while O(n) is the worst case for this check it prevents us from running a
   150  	// considerably more expensive merge
   151  	return true
   152  }
   153  
   154  type DeltaNilResults struct {
   155  	ToDelete []NilProperty
   156  	ToAdd    []NilProperty
   157  }
   158  
   159  func DeltaNil(previous, next []NilProperty) DeltaNilResults {
   160  	out := DeltaNilResults{}
   161  
   162  	if previous == nil {
   163  		out.ToAdd = next
   164  		return out
   165  	}
   166  
   167  	previousByProp := map[string]NilProperty{}
   168  	for _, prevProp := range previous {
   169  		previousByProp[prevProp.Name] = prevProp
   170  	}
   171  
   172  	for _, nextProp := range next {
   173  		if _, ok := previousByProp[nextProp.Name]; !ok {
   174  			out.ToAdd = append(out.ToAdd, nextProp)
   175  			continue
   176  		}
   177  		delete(previousByProp, nextProp.Name)
   178  	}
   179  
   180  	// extend ToDelete with props from previous missing in next
   181  	for _, prevProp := range previous {
   182  		if _, ok := previousByProp[prevProp.Name]; ok {
   183  			out.ToDelete = append(out.ToDelete, prevProp)
   184  		}
   185  	}
   186  
   187  	return out
   188  }