github.com/qri-io/qri@v0.10.1-0.20220104210721-c771715036cb/base/dsfs/commit.go (about)

     1  package dsfs
     2  
     3  import (
     4  	"context"
     5  	"encoding/base64"
     6  	"fmt"
     7  	"io/ioutil"
     8  	"path/filepath"
     9  	"time"
    10  
    11  	crypto "github.com/libp2p/go-libp2p-core/crypto"
    12  	"github.com/qri-io/dataset"
    13  	"github.com/qri-io/dataset/dsio"
    14  	"github.com/qri-io/deepdiff"
    15  	"github.com/qri-io/qfs"
    16  	"github.com/qri-io/qri/base/friendly"
    17  	"github.com/qri-io/qri/base/toqtype"
    18  	"github.com/qri-io/qri/event"
    19  )
    20  
    21  // Timestamp is an function for getting commit timestamps
    22  // timestamps MUST be stored in UTC time zone
    23  var Timestamp = func() time.Time {
    24  	return time.Now().UTC()
    25  }
    26  
    27  // BodyAction represents the action that should be taken to understand how the
    28  // body changed
    29  type BodyAction string
    30  
    31  const (
    32  	// BodyDefault is the default action: compare them to get how much changed
    33  	BodyDefault BodyAction = "default"
    34  	// BodySame means that the bodies are the same, no need to compare
    35  	BodySame BodyAction = "same"
    36  	// BodyTooBig means the body is too big to directly compare, and should use
    37  	// some other method
    38  	BodyTooBig BodyAction = "too_big"
    39  )
    40  
    41  func commitFileAddFunc(ctx context.Context, privKey crypto.PrivKey, publisher event.Publisher) writeComponentFunc {
    42  	return func(src qfs.Filesystem, dst qfs.MerkleDagStore, prev, ds *dataset.Dataset, added qfs.Links, sw *SaveSwitches) error {
    43  		if ds.Commit == nil {
    44  			return errNoComponent
    45  		}
    46  
    47  		if evtErr := publisher.Publish(ctx, event.ETDatasetSaveProgress, event.DsSaveEvent{
    48  			Username:   ds.Peername,
    49  			Name:       ds.Name,
    50  			Message:    "finalizing",
    51  			Completion: 0.9,
    52  		}); evtErr != nil {
    53  			log.Debugw("publish event errored", "error", evtErr)
    54  		}
    55  
    56  		log.Debugw("writing commit file", "bodyAction", sw.bodyAct, "force", sw.ForceIfNoChanges, "fileHint", sw.FileHint)
    57  
    58  		updateScriptPaths(dst, ds, added)
    59  
    60  		if err := confirmByteChangesExist(ds, prev, sw.ForceIfNoChanges, dst, added); err != nil {
    61  			return fmt.Errorf("saving failed: %w", err)
    62  		}
    63  
    64  		if err := EnsureCommitTitleAndMessage(ctx, src, ds, prev, sw.bodyAct, sw.FileHint, sw.ForceIfNoChanges); err != nil {
    65  			log.Debugf("EnsureCommitTitleAndMessage: %s", err)
    66  			return fmt.Errorf("saving failed: %w", err)
    67  		}
    68  
    69  		ds.DropTransientValues()
    70  		setComponentRefs(dst, ds, bodyFilename(ds), added)
    71  
    72  		signedBytes, err := privKey.Sign(ds.SigningBytes())
    73  		if err != nil {
    74  			log.Debug(err.Error())
    75  			return fmt.Errorf("signing commit: %w", err)
    76  		}
    77  		ds.Commit.Signature = base64.StdEncoding.EncodeToString(signedBytes)
    78  		log.Debugw("writing commit", "title", ds.Commit.Title, "message", ds.Commit.Message)
    79  
    80  		f, err := JSONFile(PackageFileCommit.String(), ds.Commit)
    81  		if err != nil {
    82  			return err
    83  		}
    84  		return writePackageFile(dst, f, added)
    85  	}
    86  }
    87  
    88  // confirmByteChangesExist returns an early error if no components paths
    89  // differ from the previous flag & we're not forcing a commit.
    90  // if we are forcing a commit, set commit title and message values, which
    91  // triggers a fast-path in EnsureCommitTitleAndMessage
    92  //
    93  // keep in mind: it is possible for byte-level changes to exist, but not cause
    94  // any alterations to dataset values, (for example: removing non-sensitive
    95  // whitespace)
    96  func confirmByteChangesExist(ds, prev *dataset.Dataset, force bool, dst qfs.MerkleDagStore, added qfs.Links) error {
    97  	if force {
    98  		log.Debugf("forcing changes. skipping uniqueness checks")
    99  		// fast path: forced changes ignore all comparison
   100  		if ds.Commit.Title == "" {
   101  			ds.Commit.Title = "forced update"
   102  		}
   103  		if ds.Commit.Message == "" {
   104  			ds.Commit.Message = "forced update"
   105  		}
   106  		return nil
   107  	}
   108  
   109  	if prev == nil {
   110  		return nil
   111  	}
   112  
   113  	// Viz, Readme and Transform components are inlined in the dataset, so they
   114  	// don't have path values before the commit component is finalized.
   115  	// use field equality checks instead of path comparison
   116  	if !ds.Viz.ShallowCompare(prev.Viz) {
   117  		log.Debugf("byte changes exist. viz components are inequal")
   118  		return nil
   119  	}
   120  	if !ds.Readme.ShallowCompare(prev.Readme) {
   121  		log.Debugf("byte changes exist. readme components are inequal")
   122  		return nil
   123  	}
   124  	if !ds.Transform.ShallowCompare(prev.Transform) {
   125  		log.Debugf("byte changes exist. transform components are inequal")
   126  		return nil
   127  	}
   128  
   129  	// create path map for previous, ignoring dataset & commit components which
   130  	// don't yet have paths on the next version
   131  	prevRefs := prev.PathMap("dataset", "commit")
   132  
   133  	// create an empty dataset & populate it with path references to avoid
   134  	// altering the in-flight dataset
   135  	nextDs := &dataset.Dataset{}
   136  	setComponentRefs(dst, nextDs, bodyFilename(ds), added)
   137  	nextRefs := nextDs.PathMap()
   138  
   139  	for key, nextPath := range nextRefs {
   140  		if prevRefs[key] != nextPath {
   141  			log.Debugf("byte changes exist. %q components are inequal", key)
   142  			return nil
   143  		}
   144  	}
   145  	// need to check previous paths in case next version is dropping components
   146  	for key, prevPath := range prevRefs {
   147  		if nextRefs[key] != prevPath {
   148  			log.Debugf("byte changes exist. %q components are inequal", key)
   149  			return nil
   150  		}
   151  	}
   152  
   153  	log.Debugw("confirmByteChanges", "err", ErrNoChanges)
   154  	return ErrNoChanges
   155  }
   156  
   157  // EnsureCommitTitleAndMessage creates the commit and title, message, skipping
   158  // if both title and message are set. If no values are provided a commit
   159  // description is generated by examining changes between the two versions
   160  func EnsureCommitTitleAndMessage(ctx context.Context, fs qfs.Filesystem, ds, prev *dataset.Dataset, bodyAct BodyAction, fileHint string, forceIfNoChanges bool) error {
   161  	if ds.Commit == nil {
   162  		ds.Commit = &dataset.Commit{}
   163  	}
   164  	if ds.Commit.Title != "" && ds.Commit.Message != "" {
   165  		log.Debugf("commit meta & title are set. skipping commit description calculation")
   166  		return nil
   167  	}
   168  
   169  	// fast path when commit and title are set
   170  	log.Debugw("EnsureCommitTitleAndMessage", "bodyAct", bodyAct)
   171  	shortTitle, longMessage, err := generateCommitDescriptions(ctx, fs, ds, prev, bodyAct, forceIfNoChanges)
   172  	if err != nil {
   173  		log.Debugf("generateCommitDescriptions err: %s", err)
   174  		return err
   175  	}
   176  
   177  	if shortTitle == defaultCreatedDescription && fileHint != "" {
   178  		shortTitle = shortTitle + " from " + filepath.Base(fileHint)
   179  	}
   180  	if longMessage == defaultCreatedDescription && fileHint != "" {
   181  		longMessage = longMessage + " from " + filepath.Base(fileHint)
   182  	}
   183  
   184  	if ds.Commit.Title == "" {
   185  		ds.Commit.Title = shortTitle
   186  	}
   187  	if ds.Commit.Message == "" {
   188  		ds.Commit.Message = longMessage
   189  	}
   190  
   191  	return nil
   192  }
   193  
   194  const defaultCreatedDescription = "created dataset"
   195  
   196  // returns a commit message based on the diff of the two datasets
   197  func generateCommitDescriptions(ctx context.Context, fs qfs.Filesystem, ds, prev *dataset.Dataset, bodyAct BodyAction, forceIfNoChanges bool) (short, long string, err error) {
   198  	if prev == nil || prev.IsEmpty() {
   199  		return defaultCreatedDescription, defaultCreatedDescription, nil
   200  	}
   201  
   202  	// Inline body if it is a reasonable size, to get message about how the body has changed.
   203  	if bodyAct != BodySame {
   204  		// If previous version had bodyfile, read it and assign it
   205  		if prev.Structure != nil && prev.Structure.Length < BodySizeSmallEnoughToDiff {
   206  			if prev.BodyFile() != nil {
   207  				log.Debugf("inlining body file to calculate a diff")
   208  				if prevReader, err := dsio.NewEntryReader(prev.Structure, prev.BodyFile()); err == nil {
   209  					if prevBodyData, err := dsio.ReadAll(prevReader); err == nil {
   210  						prev.Body = prevBodyData
   211  					}
   212  				}
   213  			}
   214  		}
   215  	}
   216  
   217  	// Read the transform files to see if they changed.
   218  	// TODO(dustmop): Would be better to get a line-by-line diff
   219  	if prev.Transform != nil && prev.Transform.ScriptPath != "" {
   220  		log.Debugf("inlining prev transform ScriptPath=%q", prev.Transform.ScriptPath)
   221  		err := prev.Transform.OpenScriptFile(ctx, fs)
   222  		if err != nil {
   223  			log.Error("prev.Transform.ScriptPath %q open err: %s", prev.Transform.ScriptPath, err)
   224  		} else {
   225  			tfFile := prev.Transform.ScriptFile()
   226  			content, err := ioutil.ReadAll(tfFile)
   227  			if err != nil {
   228  				log.Error("prev.Transform.ScriptPath %q read err: %s", prev.Transform.ScriptPath, err)
   229  			}
   230  			prev.Transform.Text = string(content)
   231  		}
   232  	}
   233  	if ds.Transform != nil && ds.Transform.ScriptPath != "" {
   234  		log.Debugf("inlining next transform ScriptPath=%q", ds.Transform.ScriptPath)
   235  		err = ds.Transform.OpenScriptFile(ctx, fs)
   236  		if err != nil {
   237  			log.Errorf("ds.Transform.ScriptPath %q open err: %s", ds.Transform.ScriptPath, err)
   238  		} else {
   239  			tfFile := ds.Transform.ScriptFile()
   240  			content, err := ioutil.ReadAll(tfFile)
   241  			if err != nil {
   242  				log.Errorf("ds.Transform.ScriptPath %q read err: %s", ds.Transform.ScriptPath, err)
   243  			}
   244  			ds.Transform.Text = string(content)
   245  		}
   246  		// Reopen the transform file so that WriteDataset will be able to write it to the store.
   247  		if reopenErr := ds.Transform.OpenScriptFile(ctx, fs); reopenErr != nil {
   248  			log.Debugf("error reopening transform script file: %q", reopenErr)
   249  		}
   250  	}
   251  
   252  	// Read the readme files to see if they changed.
   253  	// TODO(dustmop): Would be better to get a line-by-line diff
   254  	if prev.Readme != nil && prev.Readme.ScriptPath != "" {
   255  		log.Debugf("inlining prev readme ScriptPath=%q", prev.Readme.ScriptPath)
   256  		err := prev.Readme.OpenScriptFile(ctx, fs)
   257  		if err != nil {
   258  			log.Error("prev.Readme.ScriptPath %q open err: %s", prev.Readme.ScriptPath, err)
   259  		} else {
   260  			tfFile := prev.Readme.ScriptFile()
   261  			content, err := ioutil.ReadAll(tfFile)
   262  			if err != nil {
   263  				log.Error("prev.Readme.ScriptPath %q read err: %s", prev.Readme.ScriptPath, err)
   264  			}
   265  			prev.Readme.Text = string(content)
   266  		}
   267  	}
   268  	if ds.Readme != nil && ds.Readme.ScriptPath != "" {
   269  		log.Debugf("inlining next readme ScriptPath=%q", ds.Readme.ScriptPath)
   270  		err = ds.Readme.OpenScriptFile(ctx, fs)
   271  		if err != nil {
   272  			log.Debugf("ds.Readme.ScriptPath %q open err: %s", ds.Readme.ScriptPath, err)
   273  			err = nil
   274  		} else {
   275  			tfFile := ds.Readme.ScriptFile()
   276  			content, err := ioutil.ReadAll(tfFile)
   277  			if err != nil {
   278  				log.Errorf("ds.Readme.ScriptPath %q read err: %s", ds.Readme.ScriptPath, err)
   279  			}
   280  			ds.Readme.Text = string(content)
   281  		}
   282  		if reopenErr := ds.Readme.OpenScriptFile(ctx, fs); reopenErr != nil {
   283  			log.Debugf("error reopening readme script file: %q", reopenErr)
   284  		}
   285  	}
   286  
   287  	var prevData map[string]interface{}
   288  	prevData, err = toqtype.StructToMap(prev)
   289  	if err != nil {
   290  		return "", "", err
   291  	}
   292  
   293  	var nextData map[string]interface{}
   294  	nextData, err = toqtype.StructToMap(ds)
   295  	if err != nil {
   296  		return "", "", err
   297  	}
   298  
   299  	// TODO(dustmop): All of this should be using fill and/or component. Would be awesome to
   300  	// be able to do:
   301  	//   prevBody = fill.GetPathValue(prevData, "body")
   302  	//   fill.DeletePathValue(prevData, "body")
   303  	//   component.DropDerivedValues(prevData, "structure")
   304  	var prevBody interface{}
   305  	var nextBody interface{}
   306  	if bodyAct != BodySame {
   307  		prevBody = prevData["body"]
   308  		nextBody = nextData["body"]
   309  	}
   310  	delete(prevData, "body")
   311  	delete(nextData, "body")
   312  
   313  	if prevTransform, ok := prevData["transform"]; ok {
   314  		if prevObject, ok := prevTransform.(map[string]interface{}); ok {
   315  			delete(prevObject, "scriptPath")
   316  		}
   317  	}
   318  	if nextTransform, ok := nextData["transform"]; ok {
   319  		if nextObject, ok := nextTransform.(map[string]interface{}); ok {
   320  			delete(nextObject, "scriptPath")
   321  		}
   322  	}
   323  	if prevReadme, ok := prevData["readme"]; ok {
   324  		if prevObject, ok := prevReadme.(map[string]interface{}); ok {
   325  			delete(prevObject, "scriptPath")
   326  		}
   327  	}
   328  	if nextReadme, ok := nextData["readme"]; ok {
   329  		if nextObject, ok := nextReadme.(map[string]interface{}); ok {
   330  			delete(nextObject, "scriptPath")
   331  		}
   332  	}
   333  	if prevMeta, ok := prevData["meta"]; ok {
   334  		if prevObject, ok := prevMeta.(map[string]interface{}); ok {
   335  			delete(prevObject, "path")
   336  			delete(prevObject, "qri")
   337  		}
   338  	}
   339  	if nextMeta, ok := nextData["meta"]; ok {
   340  		if nextObject, ok := nextMeta.(map[string]interface{}); ok {
   341  			delete(nextObject, "path")
   342  			delete(nextObject, "qri")
   343  		}
   344  	}
   345  
   346  	var prevChecksum, nextChecksum string
   347  
   348  	if prevStructure, ok := prevData["structure"]; ok {
   349  		if prevObject, ok := prevStructure.(map[string]interface{}); ok {
   350  			if checksum, ok := prevObject["checksum"].(string); ok {
   351  				prevChecksum = checksum
   352  			}
   353  			delete(prevObject, "checksum")
   354  			delete(prevObject, "entries")
   355  			delete(prevObject, "length")
   356  			delete(prevObject, "depth")
   357  			delete(prevObject, "path")
   358  			delete(prevObject, "errCount")
   359  		}
   360  	}
   361  	if nextStructure, ok := nextData["structure"]; ok {
   362  		if nextObject, ok := nextStructure.(map[string]interface{}); ok {
   363  			if checksum, ok := nextObject["checksum"].(string); ok {
   364  				nextChecksum = checksum
   365  			}
   366  			delete(nextObject, "checksum")
   367  			delete(nextObject, "entries")
   368  			delete(nextObject, "length")
   369  			delete(nextObject, "depth")
   370  			delete(nextObject, "path")
   371  			delete(nextObject, "errCount")
   372  		}
   373  	}
   374  
   375  	// If the body is too big to diff, compare the checksums. If they differ, assume the
   376  	// body has changed.
   377  	assumeBodyChanged := false
   378  	if bodyAct == BodyTooBig {
   379  		prevBody = nil
   380  		nextBody = nil
   381  		log.Debugw("checking checksum equality", "prev", prevChecksum, "next", nextChecksum)
   382  		if prevChecksum != nextChecksum {
   383  			assumeBodyChanged = true
   384  		}
   385  	}
   386  
   387  	var headDiff, bodyDiff deepdiff.Deltas
   388  	var bodyStat *deepdiff.Stats
   389  
   390  	// Diff the head and body separately. This allows accurate stats when figuring out how much
   391  	// of the body has changed.
   392  	headDiff, _, err = deepdiff.New().StatDiff(ctx, prevData, nextData)
   393  	if err != nil {
   394  		return "", "", err
   395  	}
   396  	if prevBody != nil && nextBody != nil {
   397  		log.Debugf("calculating body statDiff type(prevBody)=%T type(nextBody)=%T", prevBody, nextBody)
   398  		bodyDiff, bodyStat, err = deepdiff.New().StatDiff(ctx, prevBody, nextBody)
   399  		if err != nil {
   400  			log.Debugf("error calculating body statDiff: %q", err)
   401  			return "", "", err
   402  		}
   403  	}
   404  
   405  	shortTitle, longMessage := friendly.DiffDescriptions(headDiff, bodyDiff, bodyStat, assumeBodyChanged)
   406  	if shortTitle == "" {
   407  		if forceIfNoChanges {
   408  			return "forced update", "forced update", nil
   409  		}
   410  		log.Debugw("generateCommitDescriptions", "err", ErrNoChanges)
   411  		return "", "", ErrNoChanges
   412  	}
   413  
   414  	log.Debugw("generateCommitDescriptions", "shortTitle", shortTitle, "message", longMessage, "bodyChanged", assumeBodyChanged)
   415  	return shortTitle, longMessage, nil
   416  }