github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/store/prolly/tree/diff.go (about) 1 // Copyright 2022 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package tree 16 17 import ( 18 "bytes" 19 "context" 20 "io" 21 ) 22 23 type DiffType byte 24 25 const ( 26 AddedDiff DiffType = 0 27 ModifiedDiff DiffType = 1 28 RemovedDiff DiffType = 2 29 ) 30 31 type Diff struct { 32 Key Item 33 From, To Item 34 Type DiffType 35 } 36 37 type DiffFn func(context.Context, Diff) error 38 39 // Differ computes the diff between two prolly trees. 40 // If `considerAllRowsModified` is true, it will consider every leaf to be modified and generate a diff for every leaf. (This 41 // is useful in cases where the schema has changed and we want to consider a leaf changed even if the byte representation 42 // of the leaf is the same. 43 type Differ[K ~[]byte, O Ordering[K]] struct { 44 from, to *cursor 45 fromStop, toStop *cursor 46 order O 47 considerAllRowsModified bool 48 } 49 50 func DifferFromRoots[K ~[]byte, O Ordering[K]]( 51 ctx context.Context, 52 fromNs NodeStore, toNs NodeStore, 53 from, to Node, 54 order O, 55 considerAllRowsModified bool, 56 ) (Differ[K, O], error) { 57 var fc, tc *cursor 58 var err error 59 60 if !from.empty() { 61 fc, err = newCursorAtStart(ctx, fromNs, from) 62 if err != nil { 63 return Differ[K, O]{}, err 64 } 65 } else { 66 fc = &cursor{} 67 } 68 69 if !to.empty() { 70 tc, err = newCursorAtStart(ctx, toNs, to) 71 if err != nil { 72 return Differ[K, O]{}, err 73 } 74 } else { 75 tc = &cursor{} 76 } 77 78 fs, err := newCursorPastEnd(ctx, fromNs, from) 79 if err != nil { 80 return Differ[K, O]{}, err 81 } 82 83 ts, err := newCursorPastEnd(ctx, toNs, to) 84 if err != nil { 85 return Differ[K, O]{}, err 86 } 87 88 return Differ[K, O]{ 89 from: fc, 90 to: tc, 91 fromStop: fs, 92 toStop: ts, 93 order: order, 94 considerAllRowsModified: considerAllRowsModified, 95 }, nil 96 } 97 98 func DifferFromCursors[K ~[]byte, O Ordering[K]]( 99 ctx context.Context, 100 fromRoot, toRoot Node, 101 findStart, findStop SearchFn, 102 fromStore, toStore NodeStore, 103 order O, 104 ) (Differ[K, O], error) { 105 fromStart, err := newCursorFromSearchFn(ctx, fromStore, fromRoot, findStart) 106 if err != nil { 107 return Differ[K, O]{}, err 108 } 109 toStart, err := newCursorFromSearchFn(ctx, toStore, toRoot, findStart) 110 if err != nil { 111 return Differ[K, O]{}, err 112 } 113 fromStop, err := newCursorFromSearchFn(ctx, fromStore, fromRoot, findStop) 114 if err != nil { 115 return Differ[K, O]{}, err 116 } 117 toStop, err := newCursorFromSearchFn(ctx, toStore, toRoot, findStop) 118 if err != nil { 119 return Differ[K, O]{}, err 120 } 121 return Differ[K, O]{ 122 from: fromStart, 123 to: toStart, 124 fromStop: fromStop, 125 toStop: toStop, 126 order: order, 127 }, nil 128 } 129 130 func (td Differ[K, O]) Next(ctx context.Context) (diff Diff, err error) { 131 for td.from.Valid() && td.from.compare(td.fromStop) < 0 && td.to.Valid() && td.to.compare(td.toStop) < 0 { 132 133 f := td.from.CurrentKey() 134 t := td.to.CurrentKey() 135 cmp := td.order.Compare(K(f), K(t)) 136 137 switch { 138 case cmp < 0: 139 return sendRemoved(ctx, td.from) 140 141 case cmp > 0: 142 return sendAdded(ctx, td.to) 143 144 case cmp == 0: 145 // If the cursor schema has changed, then all rows should be considered modified. 146 // If the cursor schema hasn't changed, rows are modified iff their bytes have changed. 147 if td.considerAllRowsModified || !equalcursorValues(td.from, td.to) { 148 return sendModified(ctx, td.from, td.to) 149 } 150 151 // advance both cursors since we have already determined that they are equal. This needs to be done because 152 // skipCommon will not advance the cursors if they are equal in a collation sensitive comparison but differ 153 // in a byte comparison. 154 if err = td.from.advance(ctx); err != nil { 155 return Diff{}, err 156 } 157 if err = td.to.advance(ctx); err != nil { 158 return Diff{}, err 159 } 160 161 // seek ahead to the next diff and loop again 162 if err = skipCommon(ctx, td.from, td.to); err != nil { 163 return Diff{}, err 164 } 165 } 166 } 167 168 if td.from.Valid() && td.from.compare(td.fromStop) < 0 { 169 return sendRemoved(ctx, td.from) 170 } 171 if td.to.Valid() && td.to.compare(td.toStop) < 0 { 172 return sendAdded(ctx, td.to) 173 } 174 175 return Diff{}, io.EOF 176 } 177 178 func sendRemoved(ctx context.Context, from *cursor) (diff Diff, err error) { 179 diff = Diff{ 180 Type: RemovedDiff, 181 Key: from.CurrentKey(), 182 From: from.currentValue(), 183 } 184 185 if err = from.advance(ctx); err != nil { 186 return Diff{}, err 187 } 188 return 189 } 190 191 func sendAdded(ctx context.Context, to *cursor) (diff Diff, err error) { 192 diff = Diff{ 193 Type: AddedDiff, 194 Key: to.CurrentKey(), 195 To: to.currentValue(), 196 } 197 198 if err = to.advance(ctx); err != nil { 199 return Diff{}, err 200 } 201 return 202 } 203 204 func sendModified(ctx context.Context, from, to *cursor) (diff Diff, err error) { 205 diff = Diff{ 206 Type: ModifiedDiff, 207 Key: from.CurrentKey(), 208 From: from.currentValue(), 209 To: to.currentValue(), 210 } 211 212 if err = from.advance(ctx); err != nil { 213 return Diff{}, err 214 } 215 if err = to.advance(ctx); err != nil { 216 return Diff{}, err 217 } 218 return 219 } 220 221 func skipCommon(ctx context.Context, from, to *cursor) (err error) { 222 // track when |from.parent| and |to.parent| change 223 // to avoid unnecessary comparisons. 224 parentsAreNew := true 225 226 for from.Valid() && to.Valid() { 227 if !equalItems(from, to) { 228 // found the next difference 229 return nil 230 } 231 232 if parentsAreNew { 233 if equalParents(from, to) { 234 // if our parents are equal, we can search for differences 235 // faster at the next highest tree Level. 236 if err = skipCommonParents(ctx, from, to); err != nil { 237 return err 238 } 239 continue 240 } 241 parentsAreNew = false 242 } 243 244 // if one of the cursors is at the end of its node, it will 245 // need to Advance its parent and fetch a new node. In this 246 // case we need to Compare parents again. 247 parentsAreNew = from.atNodeEnd() || to.atNodeEnd() 248 249 if err = from.advance(ctx); err != nil { 250 return err 251 } 252 if err = to.advance(ctx); err != nil { 253 return err 254 } 255 } 256 257 return err 258 } 259 260 func skipCommonParents(ctx context.Context, from, to *cursor) (err error) { 261 err = skipCommon(ctx, from.parent, to.parent) 262 if err != nil { 263 return err 264 } 265 266 if from.parent.Valid() { 267 if err = from.fetchNode(ctx); err != nil { 268 return err 269 } 270 from.skipToNodeStart() 271 } else { 272 from.invalidateAtEnd() 273 } 274 275 if to.parent.Valid() { 276 if err = to.fetchNode(ctx); err != nil { 277 return err 278 } 279 to.skipToNodeStart() 280 } else { 281 to.invalidateAtEnd() 282 } 283 284 return 285 } 286 287 // todo(andy): assumes equal byte representations 288 func equalItems(from, to *cursor) bool { 289 return bytes.Equal(from.CurrentKey(), to.CurrentKey()) && 290 bytes.Equal(from.currentValue(), to.currentValue()) 291 } 292 293 func equalParents(from, to *cursor) (eq bool) { 294 if from.parent != nil && to.parent != nil { 295 eq = equalItems(from.parent, to.parent) 296 } 297 return 298 } 299 300 func equalcursorValues(from, to *cursor) bool { 301 return bytes.Equal(from.currentValue(), to.currentValue()) 302 }