github.com/hasnat/dolt/go@v0.0.0-20210628190320-9eb5d843fbb7/libraries/doltcore/diff/diff_summary.go (about)

     1  // Copyright 2019 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package diff
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"time"
    22  
    23  	"github.com/dolthub/dolt/go/libraries/doltcore/row"
    24  
    25  	"github.com/dolthub/dolt/go/store/diff"
    26  	"github.com/dolthub/dolt/go/store/types"
    27  )
    28  
    29  type DiffSummaryProgress struct {
    30  	Adds, Removes, Changes, CellChanges, NewSize, OldSize uint64
    31  }
    32  
    33  type reporter func(ctx context.Context, change *diff.Difference, ch chan<- DiffSummaryProgress) error
    34  
    35  // todo: make package private once dolthub is migrated
    36  // Summary reports a summary of diff changes between two values
    37  // Summary reports a summary of diff changes between two values
    38  func Summary(ctx context.Context, ch chan DiffSummaryProgress, from, to types.Map) (err error) {
    39  	ad := NewAsyncDiffer(1024)
    40  	ad.Start(ctx, from, to)
    41  	defer func() {
    42  		if cerr := ad.Close(); cerr != nil && err == nil {
    43  			err = cerr
    44  		}
    45  	}()
    46  
    47  	ch <- DiffSummaryProgress{OldSize: from.Len(), NewSize: to.Len()}
    48  
    49  	hasMore := true
    50  	var diffs []*diff.Difference
    51  	for hasMore {
    52  		diffs, hasMore, err = ad.GetDiffs(100, time.Millisecond)
    53  		if err != nil {
    54  			return err
    55  		}
    56  
    57  		for i := range diffs {
    58  			curr := diffs[i]
    59  			err := reportPkChanges(ctx, curr, ch)
    60  			if err != nil {
    61  				return err
    62  			}
    63  		}
    64  	}
    65  
    66  	return nil
    67  }
    68  
    69  func SummaryForTableDelta(ctx context.Context, ch chan DiffSummaryProgress, td TableDelta) error {
    70  	keyless, err := td.IsKeyless(ctx)
    71  	if err != nil {
    72  		return err
    73  	}
    74  
    75  	fromRows, toRows, err := td.GetMaps(ctx)
    76  	if err != nil {
    77  		return err
    78  	}
    79  
    80  	var rpr reporter
    81  	if keyless {
    82  		rpr = reportKeylessChanges
    83  	} else {
    84  		rpr = reportPkChanges
    85  		ch <- DiffSummaryProgress{
    86  			OldSize: fromRows.Len(),
    87  			NewSize: toRows.Len(),
    88  		}
    89  	}
    90  
    91  	return summaryWithReporter(ctx, ch, fromRows, toRows, rpr)
    92  }
    93  
    94  func summaryWithReporter(ctx context.Context, ch chan DiffSummaryProgress, from, to types.Map, rpr reporter) (err error) {
    95  	ad := NewAsyncDiffer(1024)
    96  	ad.Start(ctx, from, to)
    97  	defer func() {
    98  		if cerr := ad.Close(); cerr != nil && err == nil {
    99  			err = cerr
   100  		}
   101  	}()
   102  
   103  	var more bool
   104  	var diffs []*diff.Difference
   105  	for {
   106  		diffs, more, err = ad.GetDiffs(100, time.Millisecond)
   107  		if err != nil {
   108  			return err
   109  		}
   110  
   111  		for _, df := range diffs {
   112  			err = rpr(ctx, df, ch)
   113  			if err != nil {
   114  				return err
   115  			}
   116  		}
   117  
   118  		if !more {
   119  			break
   120  		}
   121  	}
   122  
   123  	return nil
   124  }
   125  
   126  func reportPkChanges(ctx context.Context, change *diff.Difference, ch chan<- DiffSummaryProgress) error {
   127  	var summary DiffSummaryProgress
   128  	switch change.ChangeType {
   129  	case types.DiffChangeAdded:
   130  		summary = DiffSummaryProgress{Adds: 1}
   131  	case types.DiffChangeRemoved:
   132  		summary = DiffSummaryProgress{Removes: 1}
   133  	case types.DiffChangeModified:
   134  		oldTuple := change.OldValue.(types.Tuple)
   135  		newTuple := change.NewValue.(types.Tuple)
   136  		cellChanges, err := row.CountCellDiffs(oldTuple, newTuple)
   137  		if err != nil {
   138  			return err
   139  		}
   140  		summary = DiffSummaryProgress{Changes: 1, CellChanges: cellChanges}
   141  	default:
   142  		return errors.New("unknown change type")
   143  	}
   144  	select {
   145  	case ch <- summary:
   146  		return nil
   147  	case <-ctx.Done():
   148  		return ctx.Err()
   149  	}
   150  }
   151  
   152  func reportKeylessChanges(ctx context.Context, change *diff.Difference, ch chan<- DiffSummaryProgress) error {
   153  	var oldCard uint64
   154  	if change.OldValue != nil {
   155  		v, err := change.OldValue.(types.Tuple).Get(row.KeylessCardinalityValIdx)
   156  		if err != nil {
   157  			return err
   158  		}
   159  		oldCard = uint64(v.(types.Uint))
   160  	}
   161  
   162  	var newCard uint64
   163  	if change.NewValue != nil {
   164  		v, err := change.NewValue.(types.Tuple).Get(row.KeylessCardinalityValIdx)
   165  		if err != nil {
   166  			return err
   167  		}
   168  		newCard = uint64(v.(types.Uint))
   169  	}
   170  
   171  	var summary DiffSummaryProgress
   172  	delta := int64(newCard) - int64(oldCard)
   173  	if delta > 0 {
   174  		summary = DiffSummaryProgress{Adds: uint64(delta)}
   175  	} else if delta < 0 {
   176  		summary = DiffSummaryProgress{Removes: uint64(-delta)}
   177  	} else {
   178  		return fmt.Errorf("diff with delta = 0 for key: %s", change.KeyValue.HumanReadableString())
   179  	}
   180  
   181  	select {
   182  	case ch <- summary:
   183  		return nil
   184  	case <-ctx.Done():
   185  		return ctx.Err()
   186  	}
   187  }