github.com/dolthub/dolt/go@v0.40.5-0.20240520175717-68db7794bea6/libraries/doltcore/env/actions/clone.go (about) 1 // Copyright 2021 Dolthub, Inc. 2 // 3 // Licensed under the Apache License, Version 2.0 (the "License"); 4 // you may not use this file except in compliance with the License. 5 // You may obtain a copy of the License at 6 // 7 // http://www.apache.org/licenses/LICENSE-2.0 8 // 9 // Unless required by applicable law or agreed to in writing, software 10 // distributed under the License is distributed on an "AS IS" BASIS, 11 // WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. 12 // See the License for the specific language governing permissions and 13 // limitations under the License. 14 15 package actions 16 17 import ( 18 "context" 19 "errors" 20 "fmt" 21 "path/filepath" 22 "sort" 23 "sync" 24 25 "github.com/dustin/go-humanize" 26 27 "github.com/dolthub/dolt/go/cmd/dolt/cli" 28 "github.com/dolthub/dolt/go/libraries/doltcore/dbfactory" 29 "github.com/dolthub/dolt/go/libraries/doltcore/doltdb" 30 "github.com/dolthub/dolt/go/libraries/doltcore/env" 31 "github.com/dolthub/dolt/go/libraries/doltcore/ref" 32 "github.com/dolthub/dolt/go/libraries/utils/config" 33 "github.com/dolthub/dolt/go/libraries/utils/filesys" 34 "github.com/dolthub/dolt/go/libraries/utils/iohelp" 35 "github.com/dolthub/dolt/go/libraries/utils/strhelp" 36 "github.com/dolthub/dolt/go/store/chunks" 37 "github.com/dolthub/dolt/go/store/datas" 38 "github.com/dolthub/dolt/go/store/datas/pull" 39 "github.com/dolthub/dolt/go/store/types" 40 ) 41 42 var ErrRepositoryExists = errors.New("data repository already exists") 43 var ErrFailedToCreateDirectory = errors.New("unable to create directories") 44 var ErrFailedToAccessDir = errors.New("unable to access directories") 45 var ErrFailedToCreateRepoStateWithRemote = errors.New("unable to create repo state with remote") 46 var ErrNoDataAtRemote = errors.New("remote at that url contains no Dolt data") 47 var ErrFailedToListBranches = errors.New("failed to list branches") 48 var ErrFailedToGetBranch = errors.New("could not get branch") 49 var ErrFailedToGetRootValue = errors.New("could not find root value") 50 var ErrFailedToCreateRemoteRef = errors.New("could not create remote ref") 51 var ErrFailedToCreateTagRef = errors.New("could not create tag ref") 52 var ErrFailedToCreateLocalBranch = errors.New("could not create local branch") 53 var ErrFailedToDeleteBranch = errors.New("could not delete local branch after clone") 54 var ErrUserNotFound = errors.New("could not determine user name. run dolt config --global --add user.name") 55 var ErrEmailNotFound = errors.New("could not determine email. run dolt config --global --add user.email") 56 var ErrCloneFailed = errors.New("clone failed") 57 58 // EnvForClone creates a new DoltEnv and configures it with repo state from the specified remote. The returned DoltEnv is ready for content to be cloned into it. The directory used for the new DoltEnv is determined by resolving the specified dir against the specified Filesys. 59 func EnvForClone(ctx context.Context, nbf *types.NomsBinFormat, r env.Remote, dir string, fs filesys.Filesys, version string, homeProvider env.HomeDirProvider) (*env.DoltEnv, error) { 60 exists, _ := fs.Exists(filepath.Join(dir, dbfactory.DoltDir)) 61 62 if exists { 63 return nil, fmt.Errorf("%w: %s", ErrRepositoryExists, dir) 64 } 65 66 err := fs.MkDirs(dir) 67 if err != nil { 68 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateDirectory, dir, err.Error()) 69 } 70 71 newFs, err := fs.WithWorkingDir(dir) 72 if err != nil { 73 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToAccessDir, dir, err.Error()) 74 } 75 76 dEnv := env.Load(ctx, homeProvider, newFs, doltdb.LocalDirDoltDB, version) 77 err = dEnv.InitRepoWithNoData(ctx, nbf) 78 if err != nil { 79 return nil, fmt.Errorf("failed to init repo: %w", err) 80 } 81 82 dEnv.RSLoadErr = nil 83 if !env.IsEmptyRemote(r) { 84 dEnv.RepoState, err = env.CloneRepoState(dEnv.FS, r) 85 if err != nil { 86 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRepoStateWithRemote, r.Name, err.Error()) 87 } 88 } 89 90 return dEnv, nil 91 } 92 93 func clonePrint(eventCh <-chan pull.TableFileEvent) { 94 var ( 95 chunksC int64 96 chunksDownloading int64 97 chunksDownloaded int64 98 currStats = make(map[string]iohelp.ReadStats) 99 tableFiles = make(map[string]*chunks.TableFile) 100 ) 101 102 p := cli.NewEphemeralPrinter() 103 104 p.Printf("Retrieving remote information.\n") 105 p.Display() 106 107 for tblFEvt := range eventCh { 108 switch tblFEvt.EventType { 109 case pull.Listed: 110 for _, tf := range tblFEvt.TableFiles { 111 c := tf 112 tableFiles[c.FileID()] = &c 113 chunksC += int64(tf.NumChunks()) 114 } 115 case pull.DownloadStart: 116 for _, tf := range tblFEvt.TableFiles { 117 chunksDownloading += int64(tf.NumChunks()) 118 } 119 case pull.DownloadStats: 120 for i, s := range tblFEvt.Stats { 121 tf := tblFEvt.TableFiles[i] 122 currStats[tf.FileID()] = s 123 } 124 case pull.DownloadSuccess: 125 for _, tf := range tblFEvt.TableFiles { 126 chunksDownloading -= int64(tf.NumChunks()) 127 chunksDownloaded += int64(tf.NumChunks()) 128 delete(currStats, tf.FileID()) 129 } 130 case pull.DownloadFailed: 131 // Ignore for now and output errors on the main thread 132 for _, tf := range tblFEvt.TableFiles { 133 delete(currStats, tf.FileID()) 134 } 135 } 136 137 p.Printf("%s of %s chunks complete. %s chunks being downloaded currently.\n", 138 strhelp.CommaIfy(chunksDownloaded), strhelp.CommaIfy(chunksC), strhelp.CommaIfy(chunksDownloading)) 139 for _, fileId := range sortedKeys(currStats) { 140 s := currStats[fileId] 141 bps := float64(s.Read) / s.Elapsed.Seconds() 142 rate := humanize.Bytes(uint64(bps)) + "/s" 143 p.Printf("Downloading file: %s (%s chunks) - %.2f%% downloaded, %s\n", 144 fileId, strhelp.CommaIfy(int64((*tableFiles[fileId]).NumChunks())), s.Percent*100, rate) 145 } 146 p.Display() 147 } 148 p.Display() 149 } 150 151 func sortedKeys(m map[string]iohelp.ReadStats) []string { 152 keys := make([]string, 0, len(m)) 153 for k := range m { 154 keys = append(keys, k) 155 } 156 sort.Strings(keys) 157 return keys 158 } 159 160 // CloneRemote - common entry point for both dolt_clone() and `dolt clone` 161 // The database must be initialized with a remote before calling this function. 162 // 163 // The `branch` parameter is the branch to clone. If it is empty, the default branch is used. 164 func CloneRemote(ctx context.Context, srcDB *doltdb.DoltDB, remoteName, branch string, singleBranch bool, depth int, dEnv *env.DoltEnv) error { 165 // We support two forms of cloning: full and shallow. These two approaches have little in common, with the exception 166 // of the first and last steps. Determining the branch to check out and setting the working set to the checked out commit. 167 168 srcRefHashes, branch, err := getSrcRefs(ctx, branch, srcDB, dEnv) 169 if err != nil { 170 return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) 171 } 172 if remoteName == "" { 173 remoteName = "origin" 174 } 175 176 var checkedOutCommit *doltdb.Commit 177 178 // Step 1) Pull the remote information we care about to a local disk. 179 if depth <= 0 { 180 checkedOutCommit, err = fullClone(ctx, srcDB, dEnv, srcRefHashes, branch, remoteName, singleBranch) 181 } else { 182 checkedOutCommit, err = shallowCloneDataPull(ctx, dEnv.DbData(), srcDB, remoteName, branch, depth) 183 } 184 185 if err != nil { 186 if err == pull.ErrNoData { 187 err = ErrNoDataAtRemote 188 } 189 return fmt.Errorf("%w; %s", ErrCloneFailed, err.Error()) 190 } 191 192 // TODO: make this interface take a DoltRef and marshal it automatically 193 err = dEnv.RepoStateWriter().SetCWBHeadRef(ctx, ref.MarshalableRef{Ref: ref.NewBranchRef(branch)}) 194 if err != nil { 195 return err 196 } 197 198 rootVal, err := checkedOutCommit.GetRootValue(ctx) 199 if err != nil { 200 return fmt.Errorf("%w: %s; %s", ErrFailedToGetRootValue, branch, err.Error()) 201 } 202 203 wsRef, err := ref.WorkingSetRefForHead(ref.NewBranchRef(branch)) 204 if err != nil { 205 return err 206 } 207 208 // Retrieve existing working set, delete if it exists 209 ws, err := dEnv.DoltDB.ResolveWorkingSet(ctx, wsRef) 210 if ws != nil { 211 dEnv.DoltDB.DeleteWorkingSet(ctx, wsRef) 212 } 213 ws = doltdb.EmptyWorkingSet(wsRef) 214 215 // Update to use current Working and Staged root 216 err = dEnv.UpdateWorkingSet(ctx, ws.WithWorkingRoot(rootVal).WithStagedRoot(rootVal)) 217 if err != nil { 218 return err 219 } 220 221 return nil 222 } 223 224 // getSrcRefs returns the refs from the source database and the branch to check out. The input branch is used if it is 225 // not empty, otherwise the default branch is determined and returned. 226 func getSrcRefs(ctx context.Context, branch string, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv) ([]doltdb.RefWithHash, string, error) { 227 srcRefHashes, err := srcDB.GetRefsWithHashes(ctx) 228 if err != nil { 229 return nil, "", err 230 } 231 232 if len(srcRefHashes) == 0 { 233 return nil, "", ErrNoDataAtRemote 234 } 235 236 branches := make([]ref.DoltRef, 0, len(srcRefHashes)) 237 for _, refHash := range srcRefHashes { 238 if refHash.Ref.GetType() == ref.BranchRefType { 239 br := refHash.Ref.(ref.BranchRef) 240 branches = append(branches, br) 241 } 242 } 243 if branch == "" { 244 branch = env.GetDefaultBranch(dEnv, branches) 245 } 246 247 return srcRefHashes, branch, nil 248 } 249 250 func fullClone(ctx context.Context, srcDB *doltdb.DoltDB, dEnv *env.DoltEnv, srcRefHashes []doltdb.RefWithHash, branch, remoteName string, singleBranch bool) (*doltdb.Commit, error) { 251 eventCh := make(chan pull.TableFileEvent, 128) 252 wg := &sync.WaitGroup{} 253 wg.Add(1) 254 go func() { 255 defer wg.Done() 256 clonePrint(eventCh) 257 }() 258 259 err := srcDB.Clone(ctx, dEnv.DoltDB, eventCh) 260 261 close(eventCh) 262 wg.Wait() 263 264 cs, _ := doltdb.NewCommitSpec(branch) 265 optCmt, err := dEnv.DoltDB.Resolve(ctx, cs, nil) 266 if err != nil { 267 return nil, err 268 } 269 cm, ok := optCmt.ToCommit() 270 if !ok { 271 return nil, doltdb.ErrGhostCommitEncountered 272 } 273 274 err = dEnv.DoltDB.DeleteAllRefs(ctx) 275 if err != nil { 276 return nil, err 277 } 278 279 // Preserve only branch and tag references from the remote. Branches are translated into remote branches, tags are preserved. 280 for _, refHash := range srcRefHashes { 281 if refHash.Ref.GetType() == ref.BranchRefType { 282 br := refHash.Ref.(ref.BranchRef) 283 if !singleBranch || br.GetPath() == branch { 284 remoteRef := ref.NewRemoteRef(remoteName, br.GetPath()) 285 err = dEnv.DoltDB.SetHead(ctx, remoteRef, refHash.Hash) 286 if err != nil { 287 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateRemoteRef, remoteRef.String(), err.Error()) 288 289 } 290 } 291 if br.GetPath() == branch { 292 // This is the only local branch after the clone is complete. 293 err = dEnv.DoltDB.SetHead(ctx, br, refHash.Hash) 294 if err != nil { 295 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateLocalBranch, br.String(), err.Error()) 296 } 297 } 298 } else if refHash.Ref.GetType() == ref.TagRefType { 299 tr := refHash.Ref.(ref.TagRef) 300 err = dEnv.DoltDB.SetHead(ctx, tr, refHash.Hash) 301 if err != nil { 302 return nil, fmt.Errorf("%w: %s; %s", ErrFailedToCreateTagRef, tr.String(), err.Error()) 303 } 304 } 305 } 306 307 return cm, nil 308 } 309 310 // shallowCloneDataPull is a shallow clone specific helper function to pull only the data required to show the given branch 311 // at the depth given. 312 func shallowCloneDataPull(ctx context.Context, destData env.DbData, srcDB *doltdb.DoltDB, remoteName, branch string, depth int) (*doltdb.Commit, error) { 313 remotes, err := destData.Rsr.GetRemotes() 314 if err != nil { 315 return nil, err 316 } 317 remote, ok := remotes.Get(remoteName) 318 if !ok { 319 // By the time we get to this point, the remote should be created, so this should never happen. 320 return nil, fmt.Errorf("remote %s not found", remoteName) 321 } 322 323 specs, err := env.ParseRefSpecs([]string{branch}, destData.Rsr, remote) 324 if err != nil { 325 return nil, err 326 } 327 328 err = ShallowFetchRefSpec(ctx, destData, srcDB, specs[0], &remote, depth) 329 if err != nil { 330 return nil, err 331 } 332 333 // After the fetch approach, we just need to create the local branch. The single remote branch already exists. 334 br := ref.NewBranchRef(branch) 335 336 cmt, err := srcDB.ResolveCommitRef(ctx, br) 337 if err != nil { 338 return nil, err 339 } 340 341 hsh, err := cmt.HashOf() 342 if err != nil { 343 return nil, err 344 } 345 346 // This is the only local branch after the clone is complete. 347 err = destData.Ddb.SetHead(ctx, br, hsh) 348 if err != nil { 349 return nil, err 350 } 351 352 return cmt, nil 353 } 354 355 // InitEmptyClonedRepo inits an empty, newly cloned repo. This would be unnecessary if we properly initialized the 356 // storage for a repository when we created it on dolthub. If we do that, this code can be removed. 357 func InitEmptyClonedRepo(ctx context.Context, dEnv *env.DoltEnv) error { 358 name := dEnv.Config.GetStringOrDefault(config.UserNameKey, "") 359 email := dEnv.Config.GetStringOrDefault(config.UserEmailKey, "") 360 initBranch := env.GetDefaultInitBranch(dEnv.Config) 361 362 if name == "" { 363 return ErrUserNotFound 364 } else if email == "" { 365 return ErrEmailNotFound 366 } 367 368 err := dEnv.InitDBWithTime(ctx, types.Format_Default, name, email, initBranch, datas.CommitterDate()) 369 if err != nil { 370 return fmt.Errorf("failed to init repo: %w", err) 371 } 372 373 return nil 374 }