github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/diff/async_differ.go (about) 1 // Copyright 2019 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package diff 16 17 import ( 18 "context" 19 "fmt" 20 "time" 21 22 "golang.org/x/sync/errgroup" 23 24 "github.com/dolthub/dolt/go/libraries/doltcore/row" 25 "github.com/dolthub/dolt/go/libraries/doltcore/schema" 26 "github.com/dolthub/dolt/go/libraries/utils/async" 27 "github.com/dolthub/dolt/go/store/diff" 28 "github.com/dolthub/dolt/go/store/types" 29 ) 30 31 func NewRowDiffer(ctx context.Context, format *types.NomsBinFormat, fromSch, toSch schema.Schema, buf int) RowDiffer { 32 ad := NewAsyncDiffer(buf) 33 34 // Returns an EmptyRowDiffer if the two schemas are not diffable. 35 if !schema.ArePrimaryKeySetsDiffable(format, fromSch, toSch) { 36 return &EmptyRowDiffer{} 37 } 38 39 if schema.IsKeyless(fromSch) || schema.IsKeyless(toSch) { 40 return &keylessDiffer{AsyncDiffer: ad} 41 } 42 43 return ad 44 } 45 46 // todo: make package private 47 type AsyncDiffer struct { 48 diffChan chan diff.Difference 49 bufferSize int 50 51 eg *errgroup.Group 52 egCtx context.Context 53 egCancel func() 54 55 diffStats map[types.DiffChangeType]uint64 56 } 57 58 var _ RowDiffer = &AsyncDiffer{} 59 60 // todo: make package private once dolthub is migrated 61 func NewAsyncDiffer(bufferedDiffs int) *AsyncDiffer { 62 return &AsyncDiffer{ 63 diffChan: make(chan diff.Difference, bufferedDiffs), 64 bufferSize: bufferedDiffs, 65 egCtx: context.Background(), 66 egCancel: func() {}, 67 diffStats: make(map[types.DiffChangeType]uint64), 68 } 69 } 70 71 func tableDontDescendLists(v1, v2 types.Value) bool { 72 kind := v1.Kind() 73 return !types.IsPrimitiveKind(kind) && kind != types.TupleKind && kind == v2.Kind() && kind != types.RefKind 74 } 75 76 func (ad *AsyncDiffer) Start(ctx context.Context, from, to types.Map) { 77 ad.start(ctx, func(ctx context.Context) error { 78 return diff.Diff(ctx, from, to, ad.diffChan, true, tableDontDescendLists) 79 }) 80 } 81 82 func (ad *AsyncDiffer) StartWithRange(ctx context.Context, from, to types.Map, start types.Value, inRange types.ValueInRange) { 83 ad.start(ctx, func(ctx context.Context) error { 84 return diff.DiffMapRange(ctx, from, to, start, inRange, ad.diffChan, true, tableDontDescendLists) 85 }) 86 } 87 88 func (ad *AsyncDiffer) start(ctx context.Context, diffFunc func(ctx context.Context) error) { 89 ad.eg, ad.egCtx = errgroup.WithContext(ctx) 90 ad.egCancel = async.GoWithCancel(ad.egCtx, ad.eg, func(ctx context.Context) (err error) { 91 defer close(ad.diffChan) 92 defer func() { 93 if r := recover(); r != nil { 94 err = fmt.Errorf("panic in diff.Diff: %v", r) 95 } 96 }() 97 return diffFunc(ctx) 98 }) 99 } 100 101 func (ad *AsyncDiffer) Close() error { 102 ad.egCancel() 103 return ad.eg.Wait() 104 } 105 106 func (ad *AsyncDiffer) getDiffs(numDiffs int, timeoutChan <-chan time.Time, pred diffPredicate) ([]*diff.Difference, bool, error) { 107 diffs := make([]*diff.Difference, 0, numDiffs) 108 for { 109 select { 110 case d, more := <-ad.diffChan: 111 if more { 112 if pred(&d) { 113 ad.diffStats[d.ChangeType]++ 114 diffs = append(diffs, &d) 115 } 116 if numDiffs != 0 && numDiffs == len(diffs) { 117 return diffs, true, nil 118 } 119 } else { 120 return diffs, false, ad.eg.Wait() 121 } 122 case <-timeoutChan: 123 return diffs, true, nil 124 case <-ad.egCtx.Done(): 125 return nil, false, ad.eg.Wait() 126 } 127 } 128 } 129 130 var forever <-chan time.Time = make(chan time.Time) 131 132 type diffPredicate func(*diff.Difference) bool 133 134 var alwaysTruePredicate diffPredicate = func(*diff.Difference) bool { 135 return true 136 } 137 138 func hasChangeTypePredicate(changeType types.DiffChangeType) diffPredicate { 139 return func(d *diff.Difference) bool { 140 return d.ChangeType == changeType 141 } 142 } 143 144 func (ad *AsyncDiffer) GetDiffs(numDiffs int, timeout time.Duration) ([]*diff.Difference, bool, error) { 145 if timeout < 0 { 146 return ad.GetDiffsWithoutTimeout(numDiffs) 147 } 148 return ad.getDiffs(numDiffs, time.After(timeout), alwaysTruePredicate) 149 } 150 151 func (ad *AsyncDiffer) GetDiffsWithFilter(numDiffs int, timeout time.Duration, filterByChangeType types.DiffChangeType) ([]*diff.Difference, bool, error) { 152 if timeout < 0 { 153 return ad.GetDiffsWithoutTimeoutWithFilter(numDiffs, filterByChangeType) 154 } 155 return ad.getDiffs(numDiffs, time.After(timeout), hasChangeTypePredicate(filterByChangeType)) 156 } 157 158 func (ad *AsyncDiffer) GetDiffsWithoutTimeoutWithFilter(numDiffs int, filterByChangeType types.DiffChangeType) ([]*diff.Difference, bool, error) { 159 return ad.getDiffs(numDiffs, forever, hasChangeTypePredicate(filterByChangeType)) 160 } 161 162 func (ad *AsyncDiffer) GetDiffsWithoutTimeout(numDiffs int) ([]*diff.Difference, bool, error) { 163 return ad.getDiffs(numDiffs, forever, alwaysTruePredicate) 164 } 165 166 type keylessDiffer struct { 167 *AsyncDiffer 168 169 df diff.Difference 170 copiesLeft uint64 171 } 172 173 var _ RowDiffer = &keylessDiffer{} 174 175 func (kd *keylessDiffer) getDiffs(numDiffs int, timeoutChan <-chan time.Time, pred diffPredicate) ([]*diff.Difference, bool, error) { 176 diffs := make([]*diff.Difference, numDiffs) 177 idx := 0 178 179 for { 180 // first populate |diffs| with copies of |kd.df| 181 182 cpy := kd.df // save a copy of kd.df to reference 183 for (idx < numDiffs) && (kd.copiesLeft > 0) { 184 diffs[idx] = &cpy 185 idx++ 186 kd.copiesLeft-- 187 } 188 if idx == numDiffs { 189 return diffs, true, nil 190 } 191 192 // then find the next Difference the satisfies |pred| 193 match := false 194 for !match { 195 select { 196 case <-timeoutChan: 197 return diffs, true, nil 198 199 case <-kd.egCtx.Done(): 200 return nil, false, kd.eg.Wait() 201 202 case d, more := <-kd.diffChan: 203 if !more { 204 return diffs[:idx], more, nil 205 } 206 207 var err error 208 kd.df, kd.copiesLeft, err = convertDiff(d) 209 if err != nil { 210 return nil, false, err 211 } 212 213 match = pred(&kd.df) 214 } 215 } 216 } 217 } 218 219 func (kd *keylessDiffer) GetDiffs(numDiffs int, timeout time.Duration) ([]*diff.Difference, bool, error) { 220 if timeout < 0 { 221 return kd.getDiffs(numDiffs, forever, alwaysTruePredicate) 222 } 223 return kd.getDiffs(numDiffs, time.After(timeout), alwaysTruePredicate) 224 } 225 226 func (kd *keylessDiffer) GetDiffsWithFilter(numDiffs int, timeout time.Duration, filterByChangeType types.DiffChangeType) ([]*diff.Difference, bool, error) { 227 if timeout < 0 { 228 return kd.getDiffs(numDiffs, forever, hasChangeTypePredicate(filterByChangeType)) 229 } 230 return kd.getDiffs(numDiffs, time.After(timeout), hasChangeTypePredicate(filterByChangeType)) 231 } 232 233 // convertDiff reports the cardinality of a change, 234 // and converts updates to adds or deletes 235 func convertDiff(df diff.Difference) (diff.Difference, uint64, error) { 236 var oldCard uint64 237 if df.OldValue != nil { 238 v, err := df.OldValue.(types.Tuple).Get(row.KeylessCardinalityValIdx) 239 if err != nil { 240 return df, 0, err 241 } 242 oldCard = uint64(v.(types.Uint)) 243 } 244 245 var newCard uint64 246 if df.NewValue != nil { 247 v, err := df.NewValue.(types.Tuple).Get(row.KeylessCardinalityValIdx) 248 if err != nil { 249 return df, 0, err 250 } 251 newCard = uint64(v.(types.Uint)) 252 } 253 254 switch df.ChangeType { 255 case types.DiffChangeRemoved: 256 return df, oldCard, nil 257 258 case types.DiffChangeAdded: 259 return df, newCard, nil 260 261 case types.DiffChangeModified: 262 delta := int64(newCard) - int64(oldCard) 263 if delta > 0 { 264 df.ChangeType = types.DiffChangeAdded 265 df.OldValue = nil 266 return df, uint64(delta), nil 267 } else if delta < 0 { 268 df.ChangeType = types.DiffChangeRemoved 269 df.NewValue = nil 270 return df, uint64(-delta), nil 271 } else { 272 panic(fmt.Sprintf("diff with delta = 0 for key: %s", df.KeyValue.HumanReadableString())) 273 } 274 default: 275 return df, 0, fmt.Errorf("unexpected DiffChange type %d", df.ChangeType) 276 } 277 } 278 279 type EmptyRowDiffer struct { 280 } 281 282 var _ RowDiffer = &EmptyRowDiffer{} 283 284 func (e EmptyRowDiffer) Start(ctx context.Context, from, to types.Map) { 285 } 286 287 func (e EmptyRowDiffer) StartWithRange(ctx context.Context, from, to types.Map, start types.Value, inRange types.ValueInRange) { 288 289 } 290 291 func (e EmptyRowDiffer) GetDiffs(numDiffs int, timeout time.Duration) ([]*diff.Difference, bool, error) { 292 return nil, false, nil 293 } 294 295 func (e EmptyRowDiffer) GetDiffsWithFilter(numDiffs int, timeout time.Duration, filterByChangeType types.DiffChangeType) ([]*diff.Difference, bool, error) { 296 return nil, false, nil 297 } 298 299 func (e EmptyRowDiffer) Close() error { 300 return nil 301 }