github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/env/actions/clone.go (about)

     1  // Copyright 2021 Dolthub, Inc.
     2  //
     3  // Licensed under the Apache License, Version 2.0 (the "License");
     4  // you may not use this file except in compliance with the License.
     5  // You may obtain a copy of the License at
     6  //
     7  //     http://www.apache.org/licenses/LICENSE-2.0
     8  //
     9  // Unless required by applicable law or agreed to in writing, software
    10  // distributed under the License is distributed on an "AS IS" BASIS,
    11  // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
    12  // See the License for the specific language governing permissions and
    13  // limitations under the License.
    14  
    15  package actions
    16  
    17  import (
    18  	"context"
    19  	"errors"
    20  	"fmt"
    21  	"path/filepath"
    22  	"sort"
    23  	"sync"
    24  
    25  	"github.com/dustin/go-humanize"
    26  
    27  	"github.com/dolthub/dolt/go/cmd/dolt/cli"
    28  	"github.com/dolthub/dolt/go/libraries/doltcore/dbfactory"
    29  	"github.com/dolthub/dolt/go/libraries/doltcore/doltdb"
    30  	"github.com/dolthub/dolt/go/libraries/doltcore/env"
    31  	"github.com/dolthub/dolt/go/libraries/doltcore/ref"
    32  	"github.com/dolthub/dolt/go/libraries/utils/config"
    33  	"github.com/dolthub/dolt/go/libraries/utils/filesys"
    34  	"github.com/dolthub/dolt/go/libraries/utils/iohelp"
    35  	"github.com/dolthub/dolt/go/libraries/utils/strhelp"
    36  	"github.com/dolthub/dolt/go/store/chunks"
    37  	"github.com/dolthub/dolt/go/store/datas"
    38  	"github.com/dolthub/dolt/go/store/datas/pull"
    39  	"github.com/dolthub/dolt/go/store/types"
    40  )
    41  
    42  var ErrRepositoryExists = errors.New("data repository already exists")
    43  var ErrFailedToCreateDirectory = errors.New("unable to create directories")
    44  var ErrFailedToAccessDir = errors.New("unable to access directories")
    45  var ErrFailedToCreateRepoStateWithRemote = errors.New("unable to create repo state with remote")
    46  var ErrNoDataAtRemote = errors.New("remote at that url contains no Dolt data")
    47  var ErrFailedToListBranches = errors.New("failed to list branches")
    48  var ErrFailedToGetBranch = errors.New("could not get branch")
    49  var ErrFailedToGetRootValue = errors.New("could not find root value")
    50  var ErrFailedToCreateRemoteRef = errors.New("could not create remote ref")
    51  var ErrFailedToCreateTagRef = errors.New("could not create tag ref")
    52  var ErrFailedToCreateLocalBranch = errors.New("could not create local branch")
    53  var ErrFailedToDeleteBranch = errors.New("could not delete local branch after clone")
    54  var ErrUserNotFound = errors.New("could not determine user name. run dolt config --global --add user.name")
    55  var ErrEmailNotFound = errors.New("could not determine email. run dolt config --global --add user.email")
    56  var ErrCloneFailed = errors.New("clone failed")
    57  
    58  // EnvForClone creates a new DoltEnv and configures it with repo state from the specified remote. The returned DoltEnv is ready for content to be cloned into it. The directory used for the new DoltEnv is determined by resolving the specified dir against the specified Filesys.
    59  func EnvForClone(ctx context.Context, nbf *types.NomsBinFormat, r env.Remote, dir string, fs filesys.Filesys, version string, homeProvider env.HomeDirProvider) (*env.DoltEnv, error) {
    60  	exists, _ := fs.Exists(filepath.Join(dir, dbfactory.DoltDir))
    61  
    62  	if exists {
    63  		return nil, fmt.Errorf("%w: %s", ErrRepositoryExists, dir)
    64  	}
    65  
    66  	err := fs.MkDirs(dir)
    67  	if err != nil {
    68  		return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateDirectory, dir, err.Error())
    69  	}
    70  
    71  	newFs, err := fs.WithWorkingDir(dir)
    72  	if err != nil {
    73  		return nil, fmt.Errorf("%w: %s; %s", ErrFailedToAccessDir, dir, err.Error())
    74  	}
    75  
    76  	dEnv := env.Load(ctx, homeProvider, newFs, doltdb.LocalDirDoltDB, version)
    77  	err = dEnv.InitRepoWithNoData(ctx, nbf)
    78  	if err != nil {
    79  		return nil, fmt.Errorf("failed to init repo: %w", err)
    80  	}
    81  
    82  	dEnv.RSLoadErr = nil
    83  	if !env.IsEmptyRemote(r) {
    84  		dEnv.RepoState, err = env.CloneRepoState(dEnv.FS, r)
    85  		if err != nil {
    86  			return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRepoStateWithRemote, r.Name, err.Error())
    87  		}
    88  	}
    89  
    90  	return dEnv, nil
    91  }
    92  
    93  func clonePrint(eventCh <-chan pull.TableFileEvent) {
    94  	var (
    95  		chunksC           int64
    96  		chunksDownloading int64
    97  		chunksDownloaded  int64
    98  		currStats         = make(map[string]iohelp.ReadStats)
    99  		tableFiles        = make(map[string]*chunks.TableFile)
   100  	)
   101  
   102  	p := cli.NewEphemeralPrinter()
   103  
   104  	p.Printf("Retrieving remote information.\n")
   105  	p.Display()
   106  
   107  	for tblFEvt := range eventCh {
   108  		switch tblFEvt.EventType {
   109  		case pull.Listed:
   110  			for _, tf := range tblFEvt.TableFiles {
   111  				c := tf
   112  				tableFiles[c.FileID()] = &c
   113  				chunksC += int64(tf.NumChunks())
   114  			}
   115  		case pull.DownloadStart:
   116  			for _, tf := range tblFEvt.TableFiles {
   117  				chunksDownloading += int64(tf.NumChunks())
   118  			}
   119  		case pull.DownloadStats:
   120  			for i, s := range tblFEvt.Stats {
   121  				tf := tblFEvt.TableFiles[i]
   122  				currStats[tf.FileID()] = s
   123  			}
   124  		case pull.DownloadSuccess:
   125  			for _, tf := range tblFEvt.TableFiles {
   126  				chunksDownloading -= int64(tf.NumChunks())
   127  				chunksDownloaded += int64(tf.NumChunks())
   128  				delete(currStats, tf.FileID())
   129  			}
   130  		case pull.DownloadFailed:
   131  			// Ignore for now and output errors on the main thread
   132  			for _, tf := range tblFEvt.TableFiles {
   133  				delete(currStats, tf.FileID())
   134  			}
   135  		}
   136  
   137  		p.Printf("%s of %s chunks complete. %s chunks being downloaded currently.\n",
   138  			strhelp.CommaIfy(chunksDownloaded), strhelp.CommaIfy(chunksC), strhelp.CommaIfy(chunksDownloading))
   139  		for _, fileId := range sortedKeys(currStats) {
   140  			s := currStats[fileId]
   141  			bps := float64(s.Read) / s.Elapsed.Seconds()
   142  			rate := humanize.Bytes(uint64(bps)) + "/s"
   143  			p.Printf("Downloading file: %s (%s chunks) - %.2f%% downloaded, %s\n",
   144  				fileId, strhelp.CommaIfy(int64((*tableFiles[fileId]).NumChunks())), s.Percent*100, rate)
   145  		}
   146  		p.Display()
   147  	}
   148  	p.Display()
   149  }
   150  
   151  func sortedKeys(m map[string]iohelp.ReadStats) []string {
   152  	keys := make([]string, 0, len(m))
   153  	for k := range m {
   154  		keys = append(keys, k)
   155  	}
   156  	sort.Strings(keys)
   157  	return keys
   158  }
   159  
   160  // CloneRemote - common entry point for both dolt_clone() and `dolt clone`
   161  // The database must be initialized with a remote before calling this function.
   162  //
   163  // The `branch` parameter is the branch to clone. If it is empty, the default branch is used.
   164  func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, depth int, dEnv *env.DoltEnv) error {
   165  	// We support two forms of cloning: full and shallow. These two approaches have little in common, with the exception
   166  	// of the first and last steps. Determining the branch to check out and setting the working set to the checked out commit.
   167  
   168  	srcRefHashes, branch, err := getSrcRefs(ctx, branch, srcDB, dEnv)
   169  	if err != nil {
   170  		return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error())
   171  	}
   172  	if remoteName == "" {
   173  		remoteName = "origin"
   174  	}
   175  
   176  	var checkedOutCommit *doltdb.Commit
   177  
   178  	// Step 1) Pull the remote information we care about to a local disk.
   179  	if depth <= 0 {
   180  		checkedOutCommit, err = fullClone(ctx, srcDB, dEnv, srcRefHashes, branch, remoteName, singleBranch)
   181  	} else {
   182  		checkedOutCommit, err = shallowCloneDataPull(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth)
   183  	}
   184  
   185  	if err != nil {
   186  		if err == pull.ErrNoData {
   187  			err = ErrNoDataAtRemote
   188  		}
   189  		return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error())
   190  	}
   191  
   192  	// TODO: make this interface take a DoltRef and marshal it automatically
   193  	err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)})
   194  	if err != nil {
   195  		return err
   196  	}
   197  
   198  	rootVal, err := checkedOutCommit.GetRootValue(ctx)
   199  	if err != nil {
   200  		return fmt.Errorf("%w: %s; %s", ErrFailedToGetRootValue, branch, err.Error())
   201  	}
   202  
   203  	wsRef, err := ref.WorkingSetRefForHead(ref.NewBranchRef(branch))
   204  	if err != nil {
   205  		return err
   206  	}
   207  
   208  	// Retrieve existing working set, delete if it exists
   209  	ws, err := dEnv.DoltDB.ResolveWorkingSet(ctx, wsRef)
   210  	if ws != nil {
   211  		dEnv.DoltDB.DeleteWorkingSet(ctx, wsRef)
   212  	}
   213  	ws = doltdb.EmptyWorkingSet(wsRef)
   214  
   215  	// Update to use current Working and Staged root
   216  	err = dEnv.UpdateWorkingSet(ctx, ws.WithWorkingRoot(rootVal).WithStagedRoot(rootVal))
   217  	if err != nil {
   218  		return err
   219  	}
   220  
   221  	return nil
   222  }
   223  
   224  // getSrcRefs returns the refs from the source database and the branch to check out. The input branch is used if it is
   225  // not empty, otherwise the default branch is determined and returned.
   226  func getSrcRefs(ctx context.Context, branch string, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv) ([]doltdb.RefWithHash, string, error) {
   227  	srcRefHashes, err := srcDB.GetRefsWithHashes(ctx)
   228  	if err != nil {
   229  		return nil, "", err
   230  	}
   231  
   232  	if len(srcRefHashes) == 0 {
   233  		return nil, "", ErrNoDataAtRemote
   234  	}
   235  
   236  	branches := make([]ref.DoltRef, 0, len(srcRefHashes))
   237  	for _, refHash := range srcRefHashes {
   238  		if refHash.Ref.GetType() == ref.BranchRefType {
   239  			br := refHash.Ref.(ref.BranchRef)
   240  			branches = append(branches, br)
   241  		}
   242  	}
   243  	if branch == "" {
   244  		branch = env.GetDefaultBranch(dEnv, branches)
   245  	}
   246  
   247  	return srcRefHashes, branch, nil
   248  }
   249  
   250  func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv, srcRefHashes []doltdb.RefWithHash, branch, remoteName string, singleBranch bool) (*doltdb.Commit, error) {
   251  	eventCh := make(chan pull.TableFileEvent, 128)
   252  	wg := &sync.WaitGroup{}
   253  	wg.Add(1)
   254  	go func() {
   255  		defer wg.Done()
   256  		clonePrint(eventCh)
   257  	}()
   258  
   259  	err := srcDB.Clone(ctx, dEnv.DoltDB, eventCh)
   260  
   261  	close(eventCh)
   262  	wg.Wait()
   263  
   264  	cs, _ := doltdb.NewCommitSpec(branch)
   265  	optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil)
   266  	if err != nil {
   267  		return nil, err
   268  	}
   269  	cm, ok := optCmt.ToCommit()
   270  	if !ok {
   271  		return nil, doltdb.ErrGhostCommitEncountered
   272  	}
   273  
   274  	err = dEnv.DoltDB.DeleteAllRefs(ctx)
   275  	if err != nil {
   276  		return nil, err
   277  	}
   278  
   279  	// Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved.
   280  	for _, refHash := range srcRefHashes {
   281  		if refHash.Ref.GetType() == ref.BranchRefType {
   282  			br := refHash.Ref.(ref.BranchRef)
   283  			if !singleBranch || br.GetPath() == branch {
   284  				remoteRef := ref.NewRemoteRef(remoteName, br.GetPath())
   285  				err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash)
   286  				if err != nil {
   287  					return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error())
   288  
   289  				}
   290  			}
   291  			if br.GetPath() == branch {
   292  				// This is the only local branch after the clone is complete.
   293  				err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash)
   294  				if err != nil {
   295  					return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error())
   296  				}
   297  			}
   298  		} else if refHash.Ref.GetType() == ref.TagRefType {
   299  			tr := refHash.Ref.(ref.TagRef)
   300  			err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash)
   301  			if err != nil {
   302  				return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error())
   303  			}
   304  		}
   305  	}
   306  
   307  	return cm, nil
   308  }
   309  
   310  // shallowCloneDataPull is a shallow clone specific helper function to pull only the data required to show the given branch
   311  // at the depth given.
   312  func shallowCloneDataPull(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) (*doltdb.Commit, error) {
   313  	remotes, err := destData.Rsr.GetRemotes()
   314  	if err != nil {
   315  		return nil, err
   316  	}
   317  	remote, ok := remotes.Get(remoteName)
   318  	if !ok {
   319  		// By the time we get to this point, the remote should be created, so this should never happen.
   320  		return nil, fmt.Errorf("remote %s not found", remoteName)
   321  	}
   322  
   323  	specs, err := env.ParseRefSpecs([]string{branch}, destData.Rsr, remote)
   324  	if err != nil {
   325  		return nil, err
   326  	}
   327  
   328  	err = ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth)
   329  	if err != nil {
   330  		return nil, err
   331  	}
   332  
   333  	// After the fetch approach, we just need to create the local branch. The single remote branch already exists.
   334  	br := ref.NewBranchRef(branch)
   335  
   336  	cmt, err := srcDB.ResolveCommitRef(ctx, br)
   337  	if err != nil {
   338  		return nil, err
   339  	}
   340  
   341  	hsh, err := cmt.HashOf()
   342  	if err != nil {
   343  		return nil, err
   344  	}
   345  
   346  	// This is the only local branch after the clone is complete.
   347  	err = destData.Ddb.SetHead(ctx, br, hsh)
   348  	if err != nil {
   349  		return nil, err
   350  	}
   351  
   352  	return cmt, nil
   353  }
   354  
   355  // InitEmptyClonedRepo inits an empty, newly cloned repo. This would be unnecessary if we properly initialized the
   356  // storage for a repository when we created it on dolthub. If we do that, this code can be removed.
   357  func InitEmptyClonedRepo(ctx context.Context, dEnv *env.DoltEnv) error {
   358  	name := dEnv.Config.GetStringOrDefault(config.UserNameKey, "")
   359  	email := dEnv.Config.GetStringOrDefault(config.UserEmailKey, "")
   360  	initBranch := env.GetDefaultInitBranch(dEnv.Config)
   361  
   362  	if name == "" {
   363  		return ErrUserNotFound
   364  	} else if email == "" {
   365  		return ErrEmailNotFound
   366  	}
   367  
   368  	err := dEnv.InitDBWithTime(ctx, types.Format_Default, name, email, initBranch, datas.CommitterDate())
   369  	if err != nil {
   370  		return fmt.Errorf("failed to init repo: %w", err)
   371  	}
   372  
   373  	return nil
   374  }