github.com/weaviate/weaviate@v1.24.6/adapters/repos/db/inverted/delta_analyzer.go (about) 1 // _ _ 2 // __ _____ __ ___ ___ __ _| |_ ___ 3 // \ \ /\ / / _ \/ _` \ \ / / |/ _` | __/ _ \ 4 // \ V V / __/ (_| |\ V /| | (_| | || __/ 5 // \_/\_/ \___|\__,_| \_/ |_|\__,_|\__\___| 6 // 7 // Copyright © 2016 - 2024 Weaviate B.V. All rights reserved. 8 // 9 // CONTACT: hello@weaviate.io 10 // 11 12 package inverted 13 14 import "bytes" 15 16 type DeltaResults struct { 17 ToDelete []Property 18 ToAdd []Property 19 } 20 21 func Delta(previous, next []Property) DeltaResults { 22 out := DeltaResults{} 23 24 previous = DedupItems(previous) 25 next = DedupItems(next) 26 27 if previous == nil { 28 out.ToAdd = next 29 return out 30 } 31 32 previousByProp := map[string]Property{} 33 for _, prevProp := range previous { 34 previousByProp[prevProp.Name] = prevProp 35 } 36 37 for _, nextProp := range next { 38 prevProp, ok := previousByProp[nextProp.Name] 39 if !ok { 40 // this prop didn't exist before so we can add all of it 41 out.ToAdd = append(out.ToAdd, nextProp) 42 continue 43 } 44 delete(previousByProp, nextProp.Name) 45 46 // there is a chance they're identical, such a check is pretty cheap and 47 // it could prevent us from running an expensive merge, so let's try our 48 // luck 49 if listsIdentical(prevProp.Items, nextProp.Items) { 50 // then we don't need to do anything about this prop 51 continue 52 } 53 54 toAdd, toDelete := countableDelta(prevProp.Items, nextProp.Items) 55 if len(toAdd) > 0 { 56 out.ToAdd = append(out.ToAdd, Property{ 57 Name: nextProp.Name, 58 Items: toAdd, 59 Length: nextProp.Length, 60 HasFilterableIndex: nextProp.HasFilterableIndex, 61 HasSearchableIndex: nextProp.HasSearchableIndex, 62 }) 63 } 64 if len(toDelete) > 0 { 65 out.ToDelete = append(out.ToDelete, Property{ 66 Name: nextProp.Name, 67 Items: toDelete, 68 Length: prevProp.Length, 69 HasFilterableIndex: nextProp.HasFilterableIndex, 70 HasSearchableIndex: nextProp.HasSearchableIndex, 71 }) 72 } 73 // special case to update optional length/nil indexes on 74 // all values removed 75 if len(toAdd) == 0 && len(toDelete) > 0 && 76 nextProp.Length == 0 && prevProp.Length > 0 { 77 out.ToAdd = append(out.ToAdd, Property{ 78 Name: nextProp.Name, 79 Items: []Countable{}, 80 Length: 0, 81 HasFilterableIndex: nextProp.HasFilterableIndex, 82 HasSearchableIndex: nextProp.HasSearchableIndex, 83 }) 84 } 85 } 86 87 // extend ToDelete with props from previous missing in next 88 for _, prevProp := range previous { 89 if _, ok := previousByProp[prevProp.Name]; ok { 90 out.ToDelete = append(out.ToDelete, prevProp) 91 } 92 } 93 94 return out 95 } 96 97 func countableDelta(prev, next []Countable) ([]Countable, []Countable) { 98 var ( 99 add []Countable 100 del []Countable 101 ) 102 103 seenInPrev := map[string]Countable{} 104 105 for _, prevItem := range prev { 106 seenInPrev[string(prevItem.Data)] = prevItem 107 } 108 109 for _, nextItem := range next { 110 prev, ok := seenInPrev[string(nextItem.Data)] 111 if ok && prev.TermFrequency == nextItem.TermFrequency { 112 // we have an identical overlap, delete from old list 113 delete(seenInPrev, string(nextItem.Data)) 114 // don't add to new list 115 continue 116 } 117 118 add = append(add, nextItem) 119 } 120 121 // anything that's now left on the seenInPrev map must be deleted because 122 // it either 123 // - is no longer present 124 // - is still present, but with updated values 125 for _, prevItem := range prev { 126 if _, ok := seenInPrev[string(prevItem.Data)]; ok { 127 del = append(del, prevItem) 128 } 129 } 130 131 return add, del 132 } 133 134 func listsIdentical(a []Countable, b []Countable) bool { 135 if len(a) != len(b) { 136 // can't possibly be identical if they have different lengths, exit early 137 return false 138 } 139 140 for i := range a { 141 if !bytes.Equal(a[i].Data, b[i].Data) || 142 a[i].TermFrequency != b[i].TermFrequency { 143 // return as soon as an item didn't match 144 return false 145 } 146 } 147 148 // we have proven in O(n) time that both lists are identical 149 // while O(n) is the worst case for this check it prevents us from running a 150 // considerably more expensive merge 151 return true 152 } 153 154 type DeltaNilResults struct { 155 ToDelete []NilProperty 156 ToAdd []NilProperty 157 } 158 159 func DeltaNil(previous, next []NilProperty) DeltaNilResults { 160 out := DeltaNilResults{} 161 162 if previous == nil { 163 out.ToAdd = next 164 return out 165 } 166 167 previousByProp := map[string]NilProperty{} 168 for _, prevProp := range previous { 169 previousByProp[prevProp.Name] = prevProp 170 } 171 172 for _, nextProp := range next { 173 if _, ok := previousByProp[nextProp.Name]; !ok { 174 out.ToAdd = append(out.ToAdd, nextProp) 175 continue 176 } 177 delete(previousByProp, nextProp.Name) 178 } 179 180 // extend ToDelete with props from previous missing in next 181 for _, prevProp := range previous { 182 if _, ok := previousByProp[prevProp.Name]; ok { 183 out.ToDelete = append(out.ToDelete, prevProp) 184 } 185 } 186 187 return out 188 }