github.com/pachyderm/pachyderm@v1.13.4/src/server/pfs/cmds/cmds.go (about) 1 package cmds 2 3 import ( 4 "bufio" 5 "fmt" 6 "io" 7 "io/ioutil" 8 "net/http" 9 "net/url" 10 "os" 11 "os/exec" 12 "path/filepath" 13 "strconv" 14 "strings" 15 gosync "sync" 16 17 prompt "github.com/c-bata/go-prompt" 18 "github.com/gogo/protobuf/jsonpb" 19 "github.com/mattn/go-isatty" 20 "github.com/pachyderm/pachyderm/src/client" 21 "github.com/pachyderm/pachyderm/src/client/limit" 22 pfsclient "github.com/pachyderm/pachyderm/src/client/pfs" 23 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 24 "github.com/pachyderm/pachyderm/src/client/pkg/grpcutil" 25 "github.com/pachyderm/pachyderm/src/server/cmd/pachctl/shell" 26 "github.com/pachyderm/pachyderm/src/server/pfs/pretty" 27 "github.com/pachyderm/pachyderm/src/server/pkg/cmdutil" 28 "github.com/pachyderm/pachyderm/src/server/pkg/errutil" 29 "github.com/pachyderm/pachyderm/src/server/pkg/pager" 30 "github.com/pachyderm/pachyderm/src/server/pkg/ppsconsts" 31 "github.com/pachyderm/pachyderm/src/server/pkg/progress" 32 "github.com/pachyderm/pachyderm/src/server/pkg/sync" 33 "github.com/pachyderm/pachyderm/src/server/pkg/tabwriter" 34 txncmds "github.com/pachyderm/pachyderm/src/server/transaction/cmds" 35 36 "github.com/spf13/cobra" 37 "github.com/spf13/pflag" 38 "golang.org/x/sync/errgroup" 39 ) 40 41 const ( 42 // DefaultParallelism is the default parallelism used by 'get file' and 'put file'. 43 DefaultParallelism = 10 44 ) 45 46 // Cmds returns a slice containing pfs commands. 47 func Cmds() []*cobra.Command { 48 var commands []*cobra.Command 49 50 raw := false 51 rawFlags := pflag.NewFlagSet("", pflag.ContinueOnError) 52 rawFlags.BoolVar(&raw, "raw", false, "disable pretty printing, print raw json") 53 54 fullTimestamps := false 55 fullTimestampsFlags := pflag.NewFlagSet("", pflag.ContinueOnError) 56 fullTimestampsFlags.BoolVar(&fullTimestamps, "full-timestamps", false, "Return absolute timestamps (as opposed to the default, relative timestamps).") 57 58 noPager := false 59 noPagerFlags := pflag.NewFlagSet("", pflag.ContinueOnError) 60 noPagerFlags.BoolVar(&noPager, "no-pager", false, "Don't pipe output into a pager (i.e. less).") 61 62 marshaller := &jsonpb.Marshaler{Indent: " "} 63 64 repoDocs := &cobra.Command{ 65 Short: "Docs for repos.", 66 Long: `Repos, short for repository, are the top level data objects in Pachyderm. 67 68 Repos contain version-controlled directories and files. Files can be of any size 69 or type (e.g. csv, binary, images, etc).`, 70 } 71 commands = append(commands, cmdutil.CreateDocsAlias(repoDocs, "repo", " repo$")) 72 73 var description string 74 createRepo := &cobra.Command{ 75 Use: "{{alias}} <repo>", 76 Short: "Create a new repo.", 77 Long: "Create a new repo.", 78 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 79 c, err := client.NewOnUserMachine("user") 80 if err != nil { 81 return err 82 } 83 defer c.Close() 84 85 err = txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 86 _, err = c.PfsAPIClient.CreateRepo( 87 c.Ctx(), 88 &pfsclient.CreateRepoRequest{ 89 Repo: client.NewRepo(args[0]), 90 Description: description, 91 }, 92 ) 93 return err 94 }) 95 return grpcutil.ScrubGRPC(err) 96 }), 97 } 98 createRepo.Flags().StringVarP(&description, "description", "d", "", "A description of the repo.") 99 commands = append(commands, cmdutil.CreateAlias(createRepo, "create repo")) 100 101 updateRepo := &cobra.Command{ 102 Use: "{{alias}} <repo>", 103 Short: "Update a repo.", 104 Long: "Update a repo.", 105 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 106 c, err := client.NewOnUserMachine("user") 107 if err != nil { 108 return err 109 } 110 defer c.Close() 111 112 err = txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 113 _, err = c.PfsAPIClient.CreateRepo( 114 c.Ctx(), 115 &pfsclient.CreateRepoRequest{ 116 Repo: client.NewRepo(args[0]), 117 Description: description, 118 Update: true, 119 }, 120 ) 121 return err 122 }) 123 return grpcutil.ScrubGRPC(err) 124 }), 125 } 126 updateRepo.Flags().StringVarP(&description, "description", "d", "", "A description of the repo.") 127 shell.RegisterCompletionFunc(updateRepo, shell.RepoCompletion) 128 commands = append(commands, cmdutil.CreateAlias(updateRepo, "update repo")) 129 130 inspectRepo := &cobra.Command{ 131 Use: "{{alias}} <repo>", 132 Short: "Return info about a repo.", 133 Long: "Return info about a repo.", 134 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 135 c, err := client.NewOnUserMachine("user") 136 if err != nil { 137 return err 138 } 139 defer c.Close() 140 repoInfo, err := c.InspectRepo(args[0]) 141 if err != nil { 142 return err 143 } 144 if repoInfo == nil { 145 return errors.Errorf("repo %s not found", args[0]) 146 } 147 if raw { 148 return marshaller.Marshal(os.Stdout, repoInfo) 149 } 150 ri := &pretty.PrintableRepoInfo{ 151 RepoInfo: repoInfo, 152 FullTimestamps: fullTimestamps, 153 } 154 return pretty.PrintDetailedRepoInfo(ri) 155 }), 156 } 157 inspectRepo.Flags().AddFlagSet(rawFlags) 158 inspectRepo.Flags().AddFlagSet(fullTimestampsFlags) 159 shell.RegisterCompletionFunc(inspectRepo, shell.RepoCompletion) 160 commands = append(commands, cmdutil.CreateAlias(inspectRepo, "inspect repo")) 161 162 listRepo := &cobra.Command{ 163 Short: "Return all repos.", 164 Long: "Return all repos.", 165 Run: cmdutil.RunFixedArgs(0, func(args []string) error { 166 c, err := client.NewOnUserMachine("user") 167 if err != nil { 168 return err 169 } 170 defer c.Close() 171 repoInfos, err := c.ListRepo() 172 if err != nil { 173 return err 174 } 175 if raw { 176 for _, repoInfo := range repoInfos { 177 if err := marshaller.Marshal(os.Stdout, repoInfo); err != nil { 178 return err 179 } 180 } 181 return nil 182 } 183 184 header := pretty.RepoHeader 185 if (len(repoInfos) > 0) && (repoInfos[0].AuthInfo != nil) { 186 header = pretty.RepoAuthHeader 187 } 188 writer := tabwriter.NewWriter(os.Stdout, header) 189 for _, repoInfo := range repoInfos { 190 pretty.PrintRepoInfo(writer, repoInfo, fullTimestamps) 191 } 192 return writer.Flush() 193 }), 194 } 195 listRepo.Flags().AddFlagSet(rawFlags) 196 listRepo.Flags().AddFlagSet(fullTimestampsFlags) 197 commands = append(commands, cmdutil.CreateAlias(listRepo, "list repo")) 198 199 var force bool 200 var all bool 201 var splitTransaction bool 202 deleteRepo := &cobra.Command{ 203 Use: "{{alias}} <repo>", 204 Short: "Delete a repo.", 205 Long: "Delete a repo.", 206 Run: cmdutil.RunBoundedArgs(0, 1, func(args []string) error { 207 c, err := client.NewOnUserMachine("user") 208 if err != nil { 209 return err 210 } 211 defer c.Close() 212 213 request := &pfsclient.DeleteRepoRequest{ 214 Force: force, 215 All: all, 216 SplitTransaction: splitTransaction, 217 } 218 if len(args) > 0 { 219 if all { 220 return errors.Errorf("cannot use the --all flag with an argument") 221 } 222 request.Repo = client.NewRepo(args[0]) 223 } else if !all { 224 return errors.Errorf("either a repo name or the --all flag needs to be provided") 225 } 226 if splitTransaction { 227 fmt.Println("WARNING: If using the --split-txn flag, this command must run until complete. If a failure or incomplete run occurs, then Pachyderm will be left in an inconsistent state. To resolve an inconsistent state, rerun this command.") 228 if ok, err := cmdutil.InteractiveConfirm(); err != nil { 229 return err 230 } else if !ok { 231 return nil 232 } 233 } 234 235 err = txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 236 _, err = c.PfsAPIClient.DeleteRepo(c.Ctx(), request) 237 return err 238 }) 239 return grpcutil.ScrubGRPC(err) 240 }), 241 } 242 deleteRepo.Flags().BoolVarP(&force, "force", "f", false, "remove the repo regardless of errors; use with care") 243 deleteRepo.Flags().BoolVar(&all, "all", false, "remove all repos") 244 deleteRepo.Flags().BoolVar(&splitTransaction, "split-txn", false, "split large transactions into multiple smaller transactions") 245 shell.RegisterCompletionFunc(deleteRepo, shell.RepoCompletion) 246 commands = append(commands, cmdutil.CreateAlias(deleteRepo, "delete repo")) 247 248 commitDocs := &cobra.Command{ 249 Short: "Docs for commits.", 250 Long: `Commits are atomic transactions on the content of a repo. 251 252 Creating a commit is a multistep process: 253 - start a new commit with 'start commit' 254 - write files to the commit via 'put file' 255 - finish the new commit with 'finish commit' 256 257 Commits that have been started but not finished are NOT durable storage. 258 Commits become reliable (and immutable) when they are finished. 259 260 Commits can be created with another commit as a parent.`, 261 } 262 commands = append(commands, cmdutil.CreateDocsAlias(commitDocs, "commit", " commit$")) 263 264 var parent string 265 startCommit := &cobra.Command{ 266 Use: "{{alias}} <repo>@<branch-or-commit>", 267 Short: "Start a new commit.", 268 Long: "Start a new commit with parent-commit as the parent, or start a commit on the given branch; if the branch does not exist, it will be created.", 269 Example: `# Start a new commit in repo "test" that's not on any branch 270 $ {{alias}} test 271 272 # Start a commit in repo "test" on branch "master" 273 $ {{alias}} test@master 274 275 # Start a commit with "master" as the parent in repo "test", on a new branch "patch"; essentially a fork. 276 $ {{alias}} test@patch -p master 277 278 # Start a commit with XXX as the parent in repo "test", not on any branch 279 $ {{alias}} test -p XXX`, 280 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 281 branch, err := cmdutil.ParseBranch(args[0]) 282 if err != nil { 283 return err 284 } 285 c, err := newClient("user") 286 if err != nil { 287 return err 288 } 289 defer c.Close() 290 291 var commit *pfsclient.Commit 292 err = txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 293 var err error 294 commit, err = c.PfsAPIClient.StartCommit( 295 c.Ctx(), 296 &pfsclient.StartCommitRequest{ 297 Branch: branch.Name, 298 Parent: client.NewCommit(branch.Repo.Name, parent), 299 Description: description, 300 }, 301 ) 302 return err 303 }) 304 if err == nil { 305 fmt.Println(commit.ID) 306 } 307 return grpcutil.ScrubGRPC(err) 308 }), 309 } 310 startCommit.Flags().StringVarP(&parent, "parent", "p", "", "The parent of the new commit, unneeded if branch is specified and you want to use the previous head of the branch as the parent.") 311 startCommit.MarkFlagCustom("parent", "__pachctl_get_commit $(__parse_repo ${nouns[0]})") 312 startCommit.Flags().StringVarP(&description, "message", "m", "", "A description of this commit's contents") 313 startCommit.Flags().StringVar(&description, "description", "", "A description of this commit's contents (synonym for --message)") 314 shell.RegisterCompletionFunc(startCommit, shell.BranchCompletion) 315 commands = append(commands, cmdutil.CreateAlias(startCommit, "start commit")) 316 317 finishCommit := &cobra.Command{ 318 Use: "{{alias}} <repo>@<branch-or-commit>", 319 Short: "Finish a started commit.", 320 Long: "Finish a started commit. Commit-id must be a writeable commit.", 321 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 322 commit, err := cmdutil.ParseCommit(args[0]) 323 if err != nil { 324 return err 325 } 326 c, err := newClient("user") 327 if err != nil { 328 return err 329 } 330 defer c.Close() 331 332 err = txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 333 _, err = c.PfsAPIClient.FinishCommit( 334 c.Ctx(), 335 &pfsclient.FinishCommitRequest{ 336 Commit: commit, 337 Description: description, 338 }, 339 ) 340 return err 341 }) 342 return grpcutil.ScrubGRPC(err) 343 }), 344 } 345 finishCommit.Flags().StringVarP(&description, "message", "m", "", "A description of this commit's contents (overwrites any existing commit description)") 346 finishCommit.Flags().StringVar(&description, "description", "", "A description of this commit's contents (synonym for --message)") 347 shell.RegisterCompletionFunc(finishCommit, shell.BranchCompletion) 348 commands = append(commands, cmdutil.CreateAlias(finishCommit, "finish commit")) 349 350 inspectCommit := &cobra.Command{ 351 Use: "{{alias}} <repo>@<branch-or-commit>", 352 Short: "Return info about a commit.", 353 Long: "Return info about a commit.", 354 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 355 commit, err := cmdutil.ParseCommit(args[0]) 356 if err != nil { 357 return err 358 } 359 c, err := client.NewOnUserMachine("user") 360 if err != nil { 361 return err 362 } 363 defer c.Close() 364 365 commitInfo, err := c.InspectCommit(commit.Repo.Name, commit.ID) 366 if err != nil { 367 return err 368 } 369 if commitInfo == nil { 370 return errors.Errorf("commit %s not found", commit.ID) 371 } 372 if raw { 373 return marshaller.Marshal(os.Stdout, commitInfo) 374 } 375 ci := &pretty.PrintableCommitInfo{ 376 CommitInfo: commitInfo, 377 FullTimestamps: fullTimestamps, 378 } 379 return pretty.PrintDetailedCommitInfo(os.Stdout, ci) 380 }), 381 } 382 inspectCommit.Flags().AddFlagSet(rawFlags) 383 inspectCommit.Flags().AddFlagSet(fullTimestampsFlags) 384 shell.RegisterCompletionFunc(inspectCommit, shell.BranchCompletion) 385 commands = append(commands, cmdutil.CreateAlias(inspectCommit, "inspect commit")) 386 387 var from string 388 var number int 389 listCommit := &cobra.Command{ 390 Use: "{{alias}} <repo>[@<branch>]", 391 Short: "Return all commits on a repo.", 392 Long: "Return all commits on a repo.", 393 Example: ` 394 # return commits in repo "foo" 395 $ {{alias}} foo 396 397 # return commits in repo "foo" on branch "master" 398 $ {{alias}} foo@master 399 400 # return the last 20 commits in repo "foo" on branch "master" 401 $ {{alias}} foo@master -n 20 402 403 # return commits in repo "foo" since commit XXX 404 $ {{alias}} foo@master --from XXX`, 405 Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) { 406 c, err := client.NewOnUserMachine("user") 407 if err != nil { 408 return err 409 } 410 defer c.Close() 411 412 branch, err := cmdutil.ParseBranch(args[0]) 413 if err != nil { 414 return err 415 } 416 417 if raw { 418 return c.ListCommitF(branch.Repo.Name, branch.Name, from, uint64(number), false, func(ci *pfsclient.CommitInfo) error { 419 return marshaller.Marshal(os.Stdout, ci) 420 }) 421 } 422 writer := tabwriter.NewWriter(os.Stdout, pretty.CommitHeader) 423 if err := c.ListCommitF(branch.Repo.Name, branch.Name, from, uint64(number), false, func(ci *pfsclient.CommitInfo) error { 424 pretty.PrintCommitInfo(writer, ci, fullTimestamps) 425 return nil 426 }); err != nil { 427 return err 428 } 429 return writer.Flush() 430 }), 431 } 432 listCommit.Flags().StringVarP(&from, "from", "f", "", "list all commits since this commit") 433 listCommit.Flags().IntVarP(&number, "number", "n", 0, "list only this many commits; if set to zero, list all commits") 434 listCommit.MarkFlagCustom("from", "__pachctl_get_commit $(__parse_repo ${nouns[0]})") 435 listCommit.Flags().AddFlagSet(rawFlags) 436 listCommit.Flags().AddFlagSet(fullTimestampsFlags) 437 shell.RegisterCompletionFunc(listCommit, shell.RepoCompletion) 438 commands = append(commands, cmdutil.CreateAlias(listCommit, "list commit")) 439 440 printCommitIter := func(commitIter client.CommitInfoIterator) error { 441 if raw { 442 for { 443 commitInfo, err := commitIter.Next() 444 if errors.Is(err, io.EOF) { 445 return nil 446 } 447 if err != nil { 448 return err 449 } 450 if err := marshaller.Marshal(os.Stdout, commitInfo); err != nil { 451 return err 452 } 453 } 454 } 455 writer := tabwriter.NewWriter(os.Stdout, pretty.CommitHeader) 456 for { 457 commitInfo, err := commitIter.Next() 458 if errors.Is(err, io.EOF) { 459 break 460 } 461 if err != nil { 462 return err 463 } 464 pretty.PrintCommitInfo(writer, commitInfo, fullTimestamps) 465 } 466 return writer.Flush() 467 } 468 469 var repos cmdutil.RepeatedStringArg 470 flushCommit := &cobra.Command{ 471 Use: "{{alias}} <repo>@<branch-or-commit> ...", 472 Short: "Wait for all commits caused by the specified commits to finish and return them.", 473 Long: "Wait for all commits caused by the specified commits to finish and return them.", 474 Example: ` 475 # return commits caused by foo@XXX and bar@YYY 476 $ {{alias}} foo@XXX bar@YYY 477 478 # return commits caused by foo@XXX leading to repos bar and baz 479 $ {{alias}} foo@XXX -r bar -r baz`, 480 Run: cmdutil.Run(func(args []string) error { 481 commits, err := cmdutil.ParseCommits(args) 482 if err != nil { 483 return err 484 } 485 486 c, err := client.NewOnUserMachine("user") 487 if err != nil { 488 return err 489 } 490 defer c.Close() 491 492 var toRepos []*pfsclient.Repo 493 for _, repoName := range repos { 494 toRepos = append(toRepos, client.NewRepo(repoName)) 495 } 496 497 commitIter, err := c.FlushCommit(commits, toRepos) 498 if err != nil { 499 return err 500 } 501 502 return printCommitIter(commitIter) 503 }), 504 } 505 flushCommit.Flags().VarP(&repos, "repos", "r", "Wait only for commits leading to a specific set of repos") 506 flushCommit.MarkFlagCustom("repos", "__pachctl_get_repo") 507 flushCommit.Flags().AddFlagSet(rawFlags) 508 flushCommit.Flags().AddFlagSet(fullTimestampsFlags) 509 shell.RegisterCompletionFunc(flushCommit, shell.BranchCompletion) 510 commands = append(commands, cmdutil.CreateAlias(flushCommit, "flush commit")) 511 512 var newCommits bool 513 var pipeline string 514 subscribeCommit := &cobra.Command{ 515 Use: "{{alias}} <repo>@<branch>", 516 Short: "Print commits as they are created (finished).", 517 Long: "Print commits as they are created in the specified repo and branch. By default, all existing commits on the specified branch are returned first. A commit is only considered 'created' when it's been finished.", 518 Example: ` 519 # subscribe to commits in repo "test" on branch "master" 520 $ {{alias}} test@master 521 522 # subscribe to commits in repo "test" on branch "master", but only since commit XXX. 523 $ {{alias}} test@master --from XXX 524 525 # subscribe to commits in repo "test" on branch "master", but only for new commits created from now on. 526 $ {{alias}} test@master --new`, 527 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 528 branch, err := cmdutil.ParseBranch(args[0]) 529 if err != nil { 530 return err 531 } 532 c, err := client.NewOnUserMachine("user") 533 if err != nil { 534 return err 535 } 536 defer c.Close() 537 538 if newCommits && from != "" { 539 return errors.Errorf("--new and --from cannot be used together") 540 } 541 542 if newCommits { 543 from = branch.Name 544 } 545 546 var prov *pfsclient.CommitProvenance 547 if pipeline != "" { 548 pipelineInfo, err := c.InspectPipeline(pipeline) 549 if err != nil { 550 return err 551 } 552 prov = client.NewCommitProvenance(ppsconsts.SpecRepo, pipeline, pipelineInfo.SpecCommit.ID) 553 } 554 555 commitIter, err := c.SubscribeCommit(branch.Repo.Name, branch.Name, prov, from, pfsclient.CommitState_STARTED) 556 if err != nil { 557 return err 558 } 559 560 return printCommitIter(commitIter) 561 }), 562 } 563 subscribeCommit.Flags().StringVar(&from, "from", "", "subscribe to all commits since this commit") 564 subscribeCommit.Flags().StringVar(&pipeline, "pipeline", "", "subscribe to all commits created by this pipeline") 565 subscribeCommit.MarkFlagCustom("from", "__pachctl_get_commit $(__parse_repo ${nouns[0]})") 566 subscribeCommit.Flags().BoolVar(&newCommits, "new", false, "subscribe to only new commits created from now on") 567 subscribeCommit.Flags().AddFlagSet(rawFlags) 568 subscribeCommit.Flags().AddFlagSet(fullTimestampsFlags) 569 shell.RegisterCompletionFunc(subscribeCommit, shell.BranchCompletion) 570 commands = append(commands, cmdutil.CreateAlias(subscribeCommit, "subscribe commit")) 571 572 deleteCommit := &cobra.Command{ 573 Use: "{{alias}} <repo>@<branch-or-commit>", 574 Short: "Delete an input commit.", 575 Long: "Delete an input commit. An input is a commit which is not the output of a pipeline.", 576 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 577 commit, err := cmdutil.ParseCommit(args[0]) 578 if err != nil { 579 return err 580 } 581 c, err := client.NewOnUserMachine("user") 582 if err != nil { 583 return err 584 } 585 defer c.Close() 586 587 return txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 588 return c.DeleteCommit(commit.Repo.Name, commit.ID) 589 }) 590 }), 591 } 592 shell.RegisterCompletionFunc(deleteCommit, shell.BranchCompletion) 593 commands = append(commands, cmdutil.CreateAlias(deleteCommit, "delete commit")) 594 595 branchDocs := &cobra.Command{ 596 Short: "Docs for branches.", 597 Long: `A branch in Pachyderm is an alias for a Commit ID. 598 599 The branch reference will "float" to always refer to the latest commit on the 600 branch, known as the HEAD commit. Not all commits must be on a branch and 601 multiple branches can refer to the same commit. 602 603 Any pachctl command that can take a Commit ID, can take a branch name instead.`, 604 } 605 commands = append(commands, cmdutil.CreateDocsAlias(branchDocs, "branch", " branch$")) 606 607 var branchProvenance cmdutil.RepeatedStringArg 608 var head string 609 trigger := &pfsclient.Trigger{} 610 createBranch := &cobra.Command{ 611 Use: "{{alias}} <repo>@<branch-or-commit>", 612 Short: "Create a new branch, or update an existing branch, on a repo.", 613 Long: "Create a new branch, or update an existing branch, on a repo, starting a commit on the branch will also create it, so there's often no need to call this.", 614 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 615 branch, err := cmdutil.ParseBranch(args[0]) 616 if err != nil { 617 return err 618 } 619 provenance, err := cmdutil.ParseBranches(branchProvenance) 620 if err != nil { 621 return err 622 } 623 if len(provenance) != 0 && trigger.Branch != "" { 624 return errors.Errorf("cannot use provenance and triggers on the same branch") 625 } 626 if (trigger.CronSpec != "" || trigger.Size_ != "" || trigger.Commits != 0) && 627 trigger.Branch == "" { 628 return errors.Errorf("trigger condition specified without a branch to trigger on, specify a branch with --trigger") 629 } 630 c, err := client.NewOnUserMachine("user") 631 if err != nil { 632 return err 633 } 634 defer c.Close() 635 636 return txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 637 if trigger.Branch != "" { 638 return c.CreateBranchTrigger(branch.Repo.Name, branch.Name, head, trigger) 639 } 640 return c.CreateBranch(branch.Repo.Name, branch.Name, head, provenance) 641 }) 642 }), 643 } 644 createBranch.Flags().VarP(&branchProvenance, "provenance", "p", "The provenance for the branch. format: <repo>@<branch-or-commit>") 645 createBranch.MarkFlagCustom("provenance", "__pachctl_get_repo_commit") 646 createBranch.Flags().StringVarP(&head, "head", "", "", "The head of the newly created branch.") 647 createBranch.MarkFlagCustom("head", "__pachctl_get_commit $(__parse_repo ${nouns[0]})") 648 createBranch.Flags().StringVarP(&trigger.Branch, "trigger", "t", "", "The branch to trigger this branch on.") 649 createBranch.Flags().StringVar(&trigger.CronSpec, "trigger-cron", "", "The cron spec to use in triggering.") 650 createBranch.Flags().StringVar(&trigger.Size_, "trigger-size", "", "The data size to use in triggering.") 651 createBranch.Flags().Int64Var(&trigger.Commits, "trigger-commits", 0, "The number of commits to use in triggering.") 652 createBranch.Flags().BoolVar(&trigger.All, "trigger-all", false, "Only trigger when all conditions are met, rather than when any are met.") 653 commands = append(commands, cmdutil.CreateAlias(createBranch, "create branch")) 654 655 inspectBranch := &cobra.Command{ 656 Use: "{{alias}} <repo>@<branch>", 657 Short: "Return info about a branch.", 658 Long: "Return info about a branch.", 659 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 660 c, err := client.NewOnUserMachine("user") 661 if err != nil { 662 return err 663 } 664 defer c.Close() 665 branch, err := cmdutil.ParseBranch(args[0]) 666 if err != nil { 667 return err 668 } 669 670 branchInfo, err := c.InspectBranch(branch.Repo.Name, branch.Name) 671 if err != nil { 672 return err 673 } 674 if branchInfo == nil { 675 return errors.Errorf("branch %s not found", args[0]) 676 } 677 if raw { 678 return marshaller.Marshal(os.Stdout, branchInfo) 679 } 680 681 return pretty.PrintDetailedBranchInfo(branchInfo) 682 }), 683 } 684 inspectBranch.Flags().AddFlagSet(rawFlags) 685 inspectBranch.Flags().AddFlagSet(fullTimestampsFlags) 686 shell.RegisterCompletionFunc(inspectBranch, shell.BranchCompletion) 687 commands = append(commands, cmdutil.CreateAlias(inspectBranch, "inspect branch")) 688 689 listBranch := &cobra.Command{ 690 Use: "{{alias}} <repo>", 691 Short: "Return all branches on a repo.", 692 Long: "Return all branches on a repo.", 693 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 694 c, err := client.NewOnUserMachine("user") 695 if err != nil { 696 return err 697 } 698 defer c.Close() 699 branches, err := c.ListBranch(args[0]) 700 if err != nil { 701 return err 702 } 703 if raw { 704 for _, branch := range branches { 705 if err := marshaller.Marshal(os.Stdout, branch); err != nil { 706 return err 707 } 708 } 709 return nil 710 } 711 writer := tabwriter.NewWriter(os.Stdout, pretty.BranchHeader) 712 for _, branch := range branches { 713 pretty.PrintBranch(writer, branch) 714 } 715 return writer.Flush() 716 }), 717 } 718 listBranch.Flags().AddFlagSet(rawFlags) 719 shell.RegisterCompletionFunc(listBranch, shell.RepoCompletion) 720 commands = append(commands, cmdutil.CreateAlias(listBranch, "list branch")) 721 722 deleteBranch := &cobra.Command{ 723 Use: "{{alias}} <repo>@<branch-or-commit>", 724 Short: "Delete a branch", 725 Long: "Delete a branch, while leaving the commits intact", 726 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 727 branch, err := cmdutil.ParseBranch(args[0]) 728 if err != nil { 729 return err 730 } 731 c, err := client.NewOnUserMachine("user") 732 if err != nil { 733 return err 734 } 735 defer c.Close() 736 737 return txncmds.WithActiveTransaction(c, func(c *client.APIClient) error { 738 return c.DeleteBranch(branch.Repo.Name, branch.Name, force) 739 }) 740 }), 741 } 742 deleteBranch.Flags().BoolVarP(&force, "force", "f", false, "remove the branch regardless of errors; use with care") 743 shell.RegisterCompletionFunc(deleteBranch, shell.BranchCompletion) 744 commands = append(commands, cmdutil.CreateAlias(deleteBranch, "delete branch")) 745 746 fileDocs := &cobra.Command{ 747 Short: "Docs for files.", 748 Long: `Files are the lowest level data objects in Pachyderm. 749 750 Files can be of any type (e.g. csv, binary, images, etc) or size and can be 751 written to started (but not finished) commits with 'put file'. Files can be read 752 from commits with 'get file'.`, 753 } 754 commands = append(commands, cmdutil.CreateDocsAlias(fileDocs, "file", " file$")) 755 756 var filePaths []string 757 var recursive bool 758 var inputFile string 759 var parallelism int 760 var split string 761 var targetFileDatums uint 762 var targetFileBytes uint 763 var headerRecords uint 764 var putFileCommit bool 765 var overwrite bool 766 var compress bool 767 var enableProgress bool 768 putFile := &cobra.Command{ 769 Use: "{{alias}} <repo>@<branch-or-commit>[:<path/to/file>]", 770 Short: "Put a file into the filesystem.", 771 Long: "Put a file into the filesystem. This command supports a number of ways to insert data into PFS.", 772 Example: ` 773 # Put data from stdin as repo/branch/path: 774 $ echo "data" | {{alias}} repo@branch:/path 775 776 # Put data from stdin as repo/branch/path and start / finish a new commit on the branch. 777 $ echo "data" | {{alias}} -c repo@branch:/path 778 779 # Put a file from the local filesystem as repo/branch/path: 780 $ {{alias}} repo@branch:/path -f file 781 782 # Put a file from the local filesystem as repo/branch/file: 783 $ {{alias}} repo@branch -f file 784 785 # Put the contents of a directory as repo/branch/path/dir/file: 786 $ {{alias}} -r repo@branch:/path -f dir 787 788 # Put the contents of a directory as repo/branch/dir/file: 789 $ {{alias}} -r repo@branch -f dir 790 791 # Put the contents of a directory as repo/branch/file, i.e. put files at the top level: 792 $ {{alias}} -r repo@branch:/ -f dir 793 794 # Put the data from a URL as repo/branch/path: 795 $ {{alias}} repo@branch:/path -f http://host/path 796 797 # Put the data from a URL as repo/branch/path: 798 $ {{alias}} repo@branch -f http://host/path 799 800 # Put the data from an S3 bucket as repo/branch/s3_object: 801 $ {{alias}} repo@branch -r -f s3://my_bucket 802 803 # Put several files or URLs that are listed in file. 804 # Files and URLs should be newline delimited. 805 $ {{alias}} repo@branch -i file 806 807 # Put several files or URLs that are listed at URL. 808 # NOTE this URL can reference local files, so it could cause you to put sensitive 809 # files into your Pachyderm cluster. 810 $ {{alias}} repo@branch -i http://host/path`, 811 Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) { 812 if !enableProgress { 813 progress.Disable() 814 } 815 file, err := cmdutil.ParseFile(args[0]) 816 if err != nil { 817 return err 818 } 819 opts := []client.Option{client.WithMaxConcurrentStreams(parallelism)} 820 if compress { 821 opts = append(opts, client.WithGZIPCompression()) 822 } 823 c, err := newClient("user", opts...) 824 if err != nil { 825 return err 826 } 827 defer c.Close() 828 defer progress.Wait() 829 830 // load data into pachyderm 831 pfc, err := c.NewPutFileClient() 832 if err != nil { 833 return err 834 } 835 defer func() { 836 if err := pfc.Close(); err != nil && retErr == nil { 837 retErr = err 838 } 839 }() 840 if putFileCommit { 841 fmt.Fprintf(os.Stderr, "flag --commit / -c is deprecated; as of 1.7.2, you will get the same behavior without it\n") 842 } 843 844 limiter := limit.New(int(parallelism)) 845 var sources []string 846 if inputFile != "" { 847 // User has provided a file listing sources, one per line. Read sources 848 var r io.Reader 849 if inputFile == "-" { 850 r = os.Stdin 851 } else if url, err := url.Parse(inputFile); err == nil && url.Scheme != "" { 852 resp, err := http.Get(url.String()) 853 if err != nil { 854 return err 855 } 856 defer func() { 857 if err := resp.Body.Close(); err != nil && retErr == nil { 858 retErr = err 859 } 860 }() 861 r = resp.Body 862 } else { 863 inputFile, err := os.Open(inputFile) 864 if err != nil { 865 return err 866 } 867 defer func() { 868 if err := inputFile.Close(); err != nil && retErr == nil { 869 retErr = err 870 } 871 }() 872 r = inputFile 873 } 874 // scan line by line 875 scanner := bufio.NewScanner(r) 876 for scanner.Scan() { 877 if filePath := scanner.Text(); filePath != "" { 878 sources = append(sources, filePath) 879 } 880 } 881 } else { 882 // User has provided a single source 883 sources = filePaths 884 } 885 886 // Arguments parsed; create putFileHelper and begin copying data 887 var eg errgroup.Group 888 filesPut := &gosync.Map{} 889 for _, source := range sources { 890 source := source 891 if file.Path == "" { 892 // The user has not specified a path so we use source as path. 893 if source == "-" { 894 return errors.Errorf("must specify filename when reading data from stdin") 895 } 896 eg.Go(func() error { 897 return putFileHelper(c, pfc, file.Commit.Repo.Name, file.Commit.ID, joinPaths("", source), source, recursive, overwrite, limiter, split, targetFileDatums, targetFileBytes, headerRecords, filesPut) 898 }) 899 } else if len(sources) == 1 { 900 // We have a single source and the user has specified a path, 901 // we use the path and ignore source (in terms of naming the file). 902 eg.Go(func() error { 903 return putFileHelper(c, pfc, file.Commit.Repo.Name, file.Commit.ID, file.Path, source, recursive, overwrite, limiter, split, targetFileDatums, targetFileBytes, headerRecords, filesPut) 904 }) 905 } else { 906 // We have multiple sources and the user has specified a path, 907 // we use that path as a prefix for the filepaths. 908 eg.Go(func() error { 909 return putFileHelper(c, pfc, file.Commit.Repo.Name, file.Commit.ID, joinPaths(file.Path, source), source, recursive, overwrite, limiter, split, targetFileDatums, targetFileBytes, headerRecords, filesPut) 910 }) 911 } 912 } 913 return eg.Wait() 914 }), 915 } 916 putFile.Flags().StringSliceVarP(&filePaths, "file", "f", []string{"-"}, "The file to be put, it can be a local file or a URL.") 917 putFile.Flags().StringVarP(&inputFile, "input-file", "i", "", "Read filepaths or URLs from a file. If - is used, paths are read from the standard input.") 918 putFile.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively put the files in a directory.") 919 putFile.Flags().BoolVarP(&compress, "compress", "", false, "Compress data during upload. This parameter might help you upload your uncompressed data, such as CSV files, to Pachyderm faster. Use 'compress' with caution, because if your data is already compressed, this parameter might slow down the upload speed instead of increasing.") 920 putFile.Flags().IntVarP(¶llelism, "parallelism", "p", DefaultParallelism, "The maximum number of files that can be uploaded in parallel.") 921 putFile.Flags().StringVar(&split, "split", "", "Split the input file into smaller files, subject to the constraints of --target-file-datums and --target-file-bytes. Permissible values are `line`, `json`, `sql` and `csv`.") 922 putFile.Flags().UintVar(&targetFileDatums, "target-file-datums", 0, "The upper bound of the number of datums that each file contains, the last file will contain fewer if the datums don't divide evenly; needs to be used with --split.") 923 putFile.Flags().UintVar(&targetFileBytes, "target-file-bytes", 0, "The target upper bound of the number of bytes that each file contains; needs to be used with --split.") 924 putFile.Flags().UintVar(&headerRecords, "header-records", 0, "the number of records that will be converted to a PFS 'header', and prepended to future retrievals of any subset of data from PFS; needs to be used with --split=(json|line|csv)") 925 putFile.Flags().BoolVarP(&putFileCommit, "commit", "c", false, "DEPRECATED: Put file(s) in a new commit.") 926 putFile.Flags().BoolVarP(&overwrite, "overwrite", "o", false, "Overwrite the existing content of the file, either from previous commits or previous calls to 'put file' within this commit.") 927 putFile.Flags().BoolVar(&enableProgress, "progress", isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()), "Print progress bars.") 928 shell.RegisterCompletionFunc(putFile, 929 func(flag, text string, maxCompletions int64) ([]prompt.Suggest, shell.CacheFunc) { 930 if flag == "-f" || flag == "--file" || flag == "-i" || flag == "input-file" { 931 cs, cf := shell.FilesystemCompletion(flag, text, maxCompletions) 932 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 933 } else if flag == "" || flag == "-c" || flag == "--commit" || flag == "-o" || flag == "--overwrite" { 934 cs, cf := shell.FileCompletion(flag, text, maxCompletions) 935 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 936 } 937 return nil, shell.SameFlag(flag) 938 }) 939 commands = append(commands, cmdutil.CreateAlias(putFile, "put file")) 940 941 copyFile := &cobra.Command{ 942 Use: "{{alias}} <src-repo>@<src-branch-or-commit>:<src-path> <dst-repo>@<dst-branch-or-commit>:<dst-path>", 943 Short: "Copy files between pfs paths.", 944 Long: "Copy files between pfs paths.", 945 Run: cmdutil.RunFixedArgs(2, func(args []string) (retErr error) { 946 srcFile, err := cmdutil.ParseFile(args[0]) 947 if err != nil { 948 return err 949 } 950 destFile, err := cmdutil.ParseFile(args[1]) 951 if err != nil { 952 return err 953 } 954 c, err := client.NewOnUserMachine("user", client.WithMaxConcurrentStreams(parallelism)) 955 if err != nil { 956 return err 957 } 958 defer c.Close() 959 960 return c.CopyFile( 961 srcFile.Commit.Repo.Name, srcFile.Commit.ID, srcFile.Path, 962 destFile.Commit.Repo.Name, destFile.Commit.ID, destFile.Path, 963 overwrite, 964 ) 965 }), 966 } 967 copyFile.Flags().BoolVarP(&overwrite, "overwrite", "o", false, "Overwrite the existing content of the file, either from previous commits or previous calls to 'put file' within this commit.") 968 shell.RegisterCompletionFunc(copyFile, shell.FileCompletion) 969 commands = append(commands, cmdutil.CreateAlias(copyFile, "copy file")) 970 971 var outputPath string 972 var offsetBytes int64 973 var retry bool 974 getFile := &cobra.Command{ 975 Use: "{{alias}} <repo>@<branch-or-commit>:<path/in/pfs>", 976 Short: "Return the contents of a file.", 977 Long: "Return the contents of a file.", 978 Example: ` 979 # get file "XXX" on branch "master" in repo "foo" 980 $ {{alias}} foo@master:XXX 981 982 # get file "XXX" in the parent of the current head of branch "master" 983 # in repo "foo" 984 $ {{alias}} foo@master^:XXX 985 986 # get file "XXX" in the grandparent of the current head of branch "master" 987 # in repo "foo" 988 $ {{alias}} foo@master^2:XXX 989 990 # get file "test[].txt" on branch "master" in repo "foo" 991 # the path is interpreted as a glob pattern: quote and protect regex characters 992 $ {{alias}} 'foo@master:/test\[\].txt'`, 993 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 994 if !enableProgress { 995 progress.Disable() 996 } 997 file, err := cmdutil.ParseFile(args[0]) 998 if err != nil { 999 return err 1000 } 1001 c, err := newClient("user") 1002 if err != nil { 1003 return err 1004 } 1005 defer c.Close() 1006 defer progress.Wait() 1007 if recursive { 1008 if outputPath == "" { 1009 return errors.Errorf("an output path needs to be specified when using the --recursive flag") 1010 } 1011 puller := sync.NewPuller() 1012 puller.Progress = true 1013 return puller.Pull(c, outputPath, file.Commit.Repo.Name, file.Commit.ID, file.Path, false, false, parallelism, nil, "") 1014 } 1015 var w io.Writer 1016 // If an output path is given, print the output to stdout 1017 if outputPath == "" { 1018 w = os.Stdout 1019 } else { 1020 fi, err := c.InspectFile(file.Commit.Repo.Name, file.Commit.ID, file.Path) 1021 if err != nil { 1022 return err 1023 } 1024 var f *progress.File 1025 if ofi, err := os.Stat(outputPath); retry && err == nil { 1026 // when retrying, just write the unwritten bytes 1027 if offsetBytes == 0 { 1028 offsetBytes = ofi.Size() 1029 } 1030 f, err = progress.OpenAppend(outputPath, int64(fi.SizeBytes)-offsetBytes) 1031 if err != nil { 1032 return err 1033 } 1034 } else { 1035 f, err = progress.Create(outputPath, int64(fi.SizeBytes)-offsetBytes) 1036 if err != nil { 1037 return err 1038 } 1039 } 1040 defer f.Close() 1041 w = f 1042 } 1043 return c.GetFile(file.Commit.Repo.Name, file.Commit.ID, file.Path, offsetBytes, 0, w) 1044 }), 1045 } 1046 getFile.Flags().BoolVarP(&recursive, "recursive", "r", false, "Recursively download a directory.") 1047 getFile.Flags().StringVarP(&outputPath, "output", "o", "", "The path where data will be downloaded.") 1048 getFile.Flags().IntVarP(¶llelism, "parallelism", "p", DefaultParallelism, "The maximum number of files that can be downloaded in parallel") 1049 getFile.Flags().BoolVar(&enableProgress, "progress", isatty.IsTerminal(os.Stdout.Fd()) || isatty.IsCygwinTerminal(os.Stdout.Fd()), "{true|false} Whether or not to print the progress bars.") 1050 getFile.Flags().Int64Var(&offsetBytes, "offset", 0, "The number of bytes in the file to skip ahead when reading.") 1051 getFile.Flags().BoolVar(&retry, "retry", false, "{true|false} Whether to append the missing bytes to an existing file. No-op if the file doesn't exist.") 1052 shell.RegisterCompletionFunc(getFile, shell.FileCompletion) 1053 commands = append(commands, cmdutil.CreateAlias(getFile, "get file")) 1054 1055 inspectFile := &cobra.Command{ 1056 Use: "{{alias}} <repo>@<branch-or-commit>:<path/in/pfs>", 1057 Short: "Return info about a file.", 1058 Long: "Return info about a file.", 1059 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1060 file, err := cmdutil.ParseFile(args[0]) 1061 if err != nil { 1062 return err 1063 } 1064 c, err := client.NewOnUserMachine("user") 1065 if err != nil { 1066 return err 1067 } 1068 defer c.Close() 1069 fileInfo, err := c.InspectFile(file.Commit.Repo.Name, file.Commit.ID, file.Path) 1070 if err != nil { 1071 return err 1072 } 1073 if fileInfo == nil { 1074 return errors.Errorf("file %s not found", file.Path) 1075 } 1076 if raw { 1077 return marshaller.Marshal(os.Stdout, fileInfo) 1078 } 1079 return pretty.PrintDetailedFileInfo(fileInfo) 1080 }), 1081 } 1082 inspectFile.Flags().AddFlagSet(rawFlags) 1083 shell.RegisterCompletionFunc(inspectFile, shell.FileCompletion) 1084 commands = append(commands, cmdutil.CreateAlias(inspectFile, "inspect file")) 1085 1086 var history string 1087 listFile := &cobra.Command{ 1088 Use: "{{alias}} <repo>@<branch-or-commit>[:<path/in/pfs>]", 1089 Short: "Return the files in a directory.", 1090 Long: "Return the files in a directory.", 1091 Example: ` 1092 # list top-level files on branch "master" in repo "foo" 1093 $ {{alias}} foo@master 1094 1095 # list files under directory "dir" on branch "master" in repo "foo" 1096 $ {{alias}} foo@master:dir 1097 1098 # list top-level files in the parent commit of the current head of "master" 1099 # in repo "foo" 1100 $ {{alias}} foo@master^ 1101 1102 # list top-level files in the grandparent of the current head of "master" 1103 # in repo "foo" 1104 $ {{alias}} foo@master^2 1105 1106 # list the last n versions of top-level files on branch "master" in repo "foo" 1107 $ {{alias}} foo@master --history n 1108 1109 # list all versions of top-level files on branch "master" in repo "foo" 1110 $ {{alias}} foo@master --history all 1111 1112 # list file under directory "dir[1]" on branch "master" in repo "foo" 1113 # the path is interpreted as a glob pattern: quote and protect regex characters 1114 $ {{alias}} 'foo@master:dir\[1\]'`, 1115 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1116 file, err := cmdutil.ParseFile(args[0]) 1117 if err != nil { 1118 return err 1119 } 1120 history, err := cmdutil.ParseHistory(history) 1121 if err != nil { 1122 return errors.Wrapf(err, "error parsing history flag") 1123 } 1124 c, err := client.NewOnUserMachine("user") 1125 if err != nil { 1126 return err 1127 } 1128 defer c.Close() 1129 if raw { 1130 return c.ListFileF(file.Commit.Repo.Name, file.Commit.ID, file.Path, history, func(fi *pfsclient.FileInfo) error { 1131 return marshaller.Marshal(os.Stdout, fi) 1132 }) 1133 } 1134 header := pretty.FileHeader 1135 if history != 0 { 1136 header = pretty.FileHeaderWithCommit 1137 } 1138 writer := tabwriter.NewWriter(os.Stdout, header) 1139 if err := c.ListFileF(file.Commit.Repo.Name, file.Commit.ID, file.Path, history, func(fi *pfsclient.FileInfo) error { 1140 pretty.PrintFileInfo(writer, fi, fullTimestamps, history != 0) 1141 return nil 1142 }); err != nil { 1143 return err 1144 } 1145 return writer.Flush() 1146 }), 1147 } 1148 listFile.Flags().AddFlagSet(rawFlags) 1149 listFile.Flags().AddFlagSet(fullTimestampsFlags) 1150 listFile.Flags().StringVar(&history, "history", "none", "Return revision history for files.") 1151 shell.RegisterCompletionFunc(listFile, shell.FileCompletion) 1152 commands = append(commands, cmdutil.CreateAlias(listFile, "list file")) 1153 1154 globFile := &cobra.Command{ 1155 Use: "{{alias}} <repo>@<branch-or-commit>:<pattern>", 1156 Short: "Return files that match a glob pattern in a commit.", 1157 Long: "Return files that match a glob pattern in a commit (that is, match a glob pattern in a repo at the state represented by a commit). Glob patterns are documented [here](https://golang.org/pkg/path/filepath/#Match).", 1158 Example: ` 1159 # Return files in repo "foo" on branch "master" that start 1160 # with the character "A". Note how the double quotation marks around the 1161 # parameter are necessary because otherwise your shell might interpret the "*". 1162 $ {{alias}} "foo@master:A*" 1163 1164 # Return files in repo "foo" on branch "master" under directory "data". 1165 $ {{alias}} "foo@master:data/*"`, 1166 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1167 file, err := cmdutil.ParseFile(args[0]) 1168 if err != nil { 1169 return err 1170 } 1171 c, err := client.NewOnUserMachine("user") 1172 if err != nil { 1173 return err 1174 } 1175 defer c.Close() 1176 fileInfos, err := c.GlobFile(file.Commit.Repo.Name, file.Commit.ID, file.Path) 1177 if err != nil { 1178 return err 1179 } 1180 if raw { 1181 for _, fileInfo := range fileInfos { 1182 if err := marshaller.Marshal(os.Stdout, fileInfo); err != nil { 1183 return err 1184 } 1185 } 1186 return nil 1187 } 1188 writer := tabwriter.NewWriter(os.Stdout, pretty.FileHeader) 1189 for _, fileInfo := range fileInfos { 1190 pretty.PrintFileInfo(writer, fileInfo, fullTimestamps, false) 1191 } 1192 return writer.Flush() 1193 }), 1194 } 1195 globFile.Flags().AddFlagSet(rawFlags) 1196 globFile.Flags().AddFlagSet(fullTimestampsFlags) 1197 shell.RegisterCompletionFunc(globFile, shell.FileCompletion) 1198 commands = append(commands, cmdutil.CreateAlias(globFile, "glob file")) 1199 1200 var shallow bool 1201 var nameOnly bool 1202 var diffCmdArg string 1203 diffFile := &cobra.Command{ 1204 Use: "{{alias}} <new-repo>@<new-branch-or-commit>:<new-path> [<old-repo>@<old-branch-or-commit>:<old-path>]", 1205 Short: "Return a diff of two file trees in input repo. Diff of file trees in output repo coming soon.", 1206 Long: "Return a diff of two file trees in input repo. Diff of file trees in output repo coming soon.", 1207 Example: ` 1208 # Return the diff of the file "path" of the input repo "foo" between the head of the 1209 # "master" branch and its parent. 1210 $ {{alias}} foo@master:path 1211 1212 # Return the diff between the master branches of input repos foo and bar at paths 1213 # path1 and path2, respectively. 1214 $ {{alias}} foo@master:path1 bar@master:path2`, 1215 Run: cmdutil.RunBoundedArgs(1, 2, func(args []string) error { 1216 newFile, err := cmdutil.ParseFile(args[0]) 1217 if err != nil { 1218 return err 1219 } 1220 oldFile := client.NewFile("", "", "") 1221 if len(args) == 2 { 1222 oldFile, err = cmdutil.ParseFile(args[1]) 1223 if err != nil { 1224 return err 1225 } 1226 } 1227 c, err := client.NewOnUserMachine("user") 1228 if err != nil { 1229 return err 1230 } 1231 defer c.Close() 1232 1233 return pager.Page(noPager, os.Stdout, func(w io.Writer) (retErr error) { 1234 var writer *tabwriter.Writer 1235 if nameOnly { 1236 writer = tabwriter.NewWriter(w, pretty.DiffFileHeader) 1237 defer func() { 1238 if err := writer.Flush(); err != nil && retErr == nil { 1239 retErr = err 1240 } 1241 }() 1242 } 1243 1244 newFiles, oldFiles, err := c.DiffFile( 1245 newFile.Commit.Repo.Name, newFile.Commit.ID, newFile.Path, 1246 oldFile.Commit.Repo.Name, oldFile.Commit.ID, oldFile.Path, 1247 shallow, 1248 ) 1249 if err != nil { 1250 return err 1251 } 1252 diffCmd := diffCommand(diffCmdArg) 1253 return forEachDiffFile(newFiles, oldFiles, func(nFI, oFI *pfsclient.FileInfo) error { 1254 if nameOnly { 1255 if nFI != nil { 1256 pretty.PrintDiffFileInfo(writer, true, nFI, fullTimestamps) 1257 } 1258 if oFI != nil { 1259 pretty.PrintDiffFileInfo(writer, false, oFI, fullTimestamps) 1260 } 1261 return nil 1262 } 1263 nPath, oPath := "/dev/null", "/dev/null" 1264 if nFI != nil { 1265 nPath, err = dlFile(c, nFI.File) 1266 if err != nil { 1267 return err 1268 } 1269 defer func() { 1270 if err := os.RemoveAll(nPath); err != nil && retErr == nil { 1271 retErr = err 1272 } 1273 }() 1274 } 1275 if oFI != nil { 1276 oPath, err = dlFile(c, oFI.File) 1277 defer func() { 1278 if err := os.RemoveAll(oPath); err != nil && retErr == nil { 1279 retErr = err 1280 } 1281 }() 1282 } 1283 cmd := exec.Command(diffCmd[0], append(diffCmd[1:], oPath, nPath)...) 1284 cmd.Stdout = w 1285 cmd.Stderr = os.Stderr 1286 // Diff returns exit code 1 when it finds differences 1287 // between the files, so we catch it. 1288 if err := cmd.Run(); err != nil && cmd.ProcessState.ExitCode() != 1 { 1289 return err 1290 } 1291 return nil 1292 }) 1293 }) 1294 }), 1295 } 1296 diffFile.Flags().BoolVarP(&shallow, "shallow", "s", false, "Don't descend into sub directories.") 1297 diffFile.Flags().BoolVar(&nameOnly, "name-only", false, "Show only the names of changed files.") 1298 diffFile.Flags().StringVar(&diffCmdArg, "diff-command", "", "Use a program other than git to diff files.") 1299 diffFile.Flags().AddFlagSet(fullTimestampsFlags) 1300 diffFile.Flags().AddFlagSet(noPagerFlags) 1301 shell.RegisterCompletionFunc(diffFile, shell.FileCompletion) 1302 commands = append(commands, cmdutil.CreateAlias(diffFile, "diff file")) 1303 1304 deleteFile := &cobra.Command{ 1305 Use: "{{alias}} <repo>@<branch-or-commit>:<path/in/pfs>", 1306 Short: "Delete a file.", 1307 Long: "Delete a file.", 1308 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1309 file, err := cmdutil.ParseFile(args[0]) 1310 if err != nil { 1311 return err 1312 } 1313 c, err := client.NewOnUserMachine("user") 1314 if err != nil { 1315 return err 1316 } 1317 defer c.Close() 1318 1319 return c.DeleteFile(file.Commit.Repo.Name, file.Commit.ID, file.Path) 1320 }), 1321 } 1322 shell.RegisterCompletionFunc(deleteFile, shell.FileCompletion) 1323 commands = append(commands, cmdutil.CreateAlias(deleteFile, "delete file")) 1324 1325 objectDocs := &cobra.Command{ 1326 Short: "Docs for objects.", 1327 Long: `Objects are content-addressed blobs of data that are directly stored in the backend object store. 1328 1329 Objects are a low-level resource and should not be accessed directly by most users.`, 1330 } 1331 commands = append(commands, cmdutil.CreateDocsAlias(objectDocs, "object", " object$")) 1332 1333 getObject := &cobra.Command{ 1334 Use: "{{alias}} <hash>", 1335 Short: "Print the contents of an object.", 1336 Long: "Print the contents of an object.", 1337 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1338 c, err := client.NewOnUserMachine("user") 1339 if err != nil { 1340 return err 1341 } 1342 defer c.Close() 1343 return c.GetObject(args[0], os.Stdout) 1344 }), 1345 } 1346 commands = append(commands, cmdutil.CreateAlias(getObject, "get object")) 1347 1348 tagDocs := &cobra.Command{ 1349 Short: "Docs for tags.", 1350 Long: `Tags are aliases for objects. Many tags can refer to the same object. 1351 1352 Tags are a low-level resource and should not be accessed directly by most users.`, 1353 } 1354 commands = append(commands, cmdutil.CreateDocsAlias(tagDocs, "tag", " tag$")) 1355 1356 getTag := &cobra.Command{ 1357 Use: "{{alias}} <tag>", 1358 Short: "Print the contents of a tag.", 1359 Long: "Print the contents of a tag.", 1360 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 1361 c, err := client.NewOnUserMachine("user") 1362 if err != nil { 1363 return err 1364 } 1365 defer c.Close() 1366 return c.GetTag(args[0], os.Stdout) 1367 }), 1368 } 1369 commands = append(commands, cmdutil.CreateAlias(getTag, "get tag")) 1370 1371 var fix bool 1372 fsck := &cobra.Command{ 1373 Use: "{{alias}}", 1374 Short: "Run a file system consistency check on pfs.", 1375 Long: "Run a file system consistency check on the pachyderm file system, ensuring the correct provenance relationships are satisfied.", 1376 Run: cmdutil.RunFixedArgs(0, func(args []string) error { 1377 c, err := client.NewOnUserMachine("user") 1378 if err != nil { 1379 return err 1380 } 1381 defer c.Close() 1382 errors := false 1383 if err = c.Fsck(fix, func(resp *pfsclient.FsckResponse) error { 1384 if resp.Error != "" { 1385 errors = true 1386 fmt.Printf("Error: %s\n", resp.Error) 1387 } else { 1388 fmt.Printf("Fix applied: %v", resp.Fix) 1389 } 1390 return nil 1391 }); err != nil { 1392 return err 1393 } 1394 if !errors { 1395 fmt.Println("No errors found.") 1396 } 1397 return nil 1398 }), 1399 } 1400 fsck.Flags().BoolVarP(&fix, "fix", "f", false, "Attempt to fix as many issues as possible.") 1401 commands = append(commands, cmdutil.CreateAlias(fsck, "fsck")) 1402 1403 // Add the mount commands (which aren't available on Windows, so they're in 1404 // their own file) 1405 commands = append(commands, mountCmds()...) 1406 1407 return commands 1408 } 1409 1410 func putFileHelper(c *client.APIClient, pfc client.PutFileClient, 1411 repo, commit, path, source string, recursive, overwrite bool, // destination 1412 limiter limit.ConcurrencyLimiter, 1413 split string, targetFileDatums, targetFileBytes, headerRecords uint, // split 1414 filesPut *gosync.Map) (retErr error) { 1415 // Resolve the path, then trim any prefixed '../' to avoid sending bad paths 1416 // to the server, and convert to unix path in case we're on windows. 1417 path = filepath.ToSlash(filepath.Clean(path)) 1418 for strings.HasPrefix(path, "../") { 1419 path = strings.TrimPrefix(path, "../") 1420 } 1421 1422 if _, ok := filesPut.LoadOrStore(path, nil); ok { 1423 return errors.Errorf("multiple files put with the path %s, aborting, "+ 1424 "some files may already have been put and should be cleaned up with "+ 1425 "'delete file' or 'delete commit'", path) 1426 } 1427 putFile := func(reader io.ReadSeeker) error { 1428 if split == "" { 1429 pipe, err := isPipe(reader) 1430 if err != nil { 1431 return err 1432 } 1433 if overwrite && !pipe { 1434 return sync.PushFile(c, pfc, client.NewFile(repo, commit, path), reader) 1435 } 1436 if overwrite { 1437 _, err = pfc.PutFileOverwrite(repo, commit, path, reader, 0) 1438 return err 1439 } 1440 _, err = pfc.PutFile(repo, commit, path, reader) 1441 return err 1442 } 1443 1444 var delimiter pfsclient.Delimiter 1445 switch split { 1446 case "line": 1447 delimiter = pfsclient.Delimiter_LINE 1448 case "json": 1449 delimiter = pfsclient.Delimiter_JSON 1450 case "sql": 1451 delimiter = pfsclient.Delimiter_SQL 1452 case "csv": 1453 delimiter = pfsclient.Delimiter_CSV 1454 default: 1455 return errors.Errorf("unrecognized delimiter '%s'; only accepts one of "+ 1456 "{json,line,sql,csv}", split) 1457 } 1458 _, err := pfc.PutFileSplit(repo, commit, path, delimiter, int64(targetFileDatums), int64(targetFileBytes), int64(headerRecords), overwrite, reader) 1459 return err 1460 } 1461 1462 if source == "-" { 1463 if recursive { 1464 return errors.New("cannot set -r and read from stdin (must also set -f or -i)") 1465 } 1466 limiter.Acquire() 1467 defer limiter.Release() 1468 stdin := progress.Stdin() 1469 defer stdin.Finish() 1470 return putFile(stdin) 1471 } 1472 // try parsing the filename as a url, if it is one do a PutFileURL 1473 if url, err := url.Parse(source); err == nil && url.Scheme != "" { 1474 limiter.Acquire() 1475 defer limiter.Release() 1476 return pfc.PutFileURL(repo, commit, path, url.String(), recursive, overwrite) 1477 } 1478 if recursive { 1479 var eg errgroup.Group 1480 if err := filepath.Walk(source, func(filePath string, info os.FileInfo, err error) error { 1481 // file doesn't exist 1482 if info == nil { 1483 return errors.Errorf("%s doesn't exist", filePath) 1484 } 1485 if info.IsDir() { 1486 return nil 1487 } 1488 childDest := filepath.Join(path, strings.TrimPrefix(filePath, source)) 1489 limiter.Acquire() 1490 eg.Go(func() error { 1491 defer limiter.Release() 1492 // don't do a second recursive 'put file', just put the one file at 1493 // filePath into childDest, and then this walk loop will go on to the 1494 // next one 1495 return putFileHelper(c, pfc, repo, commit, childDest, filePath, false, 1496 overwrite, limit.New(0), split, targetFileDatums, targetFileBytes, 1497 headerRecords, filesPut) 1498 }) 1499 return nil 1500 }); err != nil { 1501 return err 1502 } 1503 return eg.Wait() 1504 } 1505 limiter.Acquire() 1506 defer limiter.Release() 1507 f, err := progress.Open(source) 1508 if err != nil { 1509 return err 1510 } 1511 defer func() { 1512 if err := f.Close(); err != nil && retErr == nil { 1513 retErr = err 1514 } 1515 }() 1516 return putFile(f) 1517 } 1518 1519 func joinPaths(prefix, filePath string) string { 1520 if url, err := url.Parse(filePath); err == nil && url.Scheme != "" { 1521 if url.Scheme == "pfs" { 1522 // pfs paths are of the form pfs://host/repo/branch/path we don't 1523 // want to prefix every file with host/repo so we remove those 1524 splitPath := strings.Split(strings.TrimPrefix(url.Path, "/"), "/") 1525 if len(splitPath) < 3 { 1526 return prefix 1527 } 1528 return filepath.Join(append([]string{prefix}, splitPath[2:]...)...) 1529 } 1530 return filepath.Join(prefix, strings.TrimPrefix(url.Path, "/")) 1531 } 1532 return filepath.Join(prefix, filePath) 1533 } 1534 1535 func isPipe(r io.ReadSeeker) (bool, error) { 1536 file, ok := r.(*os.File) 1537 if !ok { 1538 return false, nil 1539 } 1540 fi, err := file.Stat() 1541 if err != nil { 1542 return false, err 1543 } 1544 return fi.Mode()&os.ModeNamedPipe != 0, nil 1545 } 1546 1547 func dlFile(pachClient *client.APIClient, f *pfsclient.File) (_ string, retErr error) { 1548 if err := os.MkdirAll(filepath.Join(os.TempDir(), filepath.Dir(f.Path)), 0777); err != nil { 1549 return "", err 1550 } 1551 file, err := ioutil.TempFile("", f.Path+"_") 1552 if err != nil { 1553 return "", err 1554 } 1555 defer func() { 1556 if err := file.Close(); err != nil && retErr == nil { 1557 retErr = err 1558 } 1559 }() 1560 if err := pachClient.GetFile(f.Commit.Repo.Name, f.Commit.ID, f.Path, 0, 0, file); err != nil { 1561 return "", err 1562 } 1563 return file.Name(), nil 1564 } 1565 1566 func diffCommand(cmdArg string) []string { 1567 if cmdArg != "" { 1568 return strings.Fields(cmdArg) 1569 } 1570 _, err := exec.LookPath("git") 1571 if err == nil { 1572 return []string{"git", "-c", "color.ui=always", "--no-pager", "diff", "--no-index"} 1573 } 1574 return []string{"diff"} 1575 } 1576 1577 func forEachDiffFile(newFiles, oldFiles []*pfsclient.FileInfo, f func(newFile, oldFile *pfsclient.FileInfo) error) error { 1578 nI, oI := 0, 0 1579 for { 1580 if nI == len(newFiles) && oI == len(oldFiles) { 1581 return nil 1582 } 1583 var oFI *pfsclient.FileInfo 1584 var nFI *pfsclient.FileInfo 1585 switch { 1586 case oI == len(oldFiles) || (nI < len(newFiles) && newFiles[nI].File.Path < oldFiles[oI].File.Path): 1587 nFI = newFiles[nI] 1588 nI++ 1589 case nI == len(newFiles) || (oI < len(oldFiles) && oldFiles[oI].File.Path < newFiles[nI].File.Path): 1590 oFI = oldFiles[oI] 1591 oI++ 1592 case newFiles[nI].File.Path == oldFiles[oI].File.Path: 1593 nFI = newFiles[nI] 1594 nI++ 1595 oFI = oldFiles[oI] 1596 oI++ 1597 } 1598 if err := f(nFI, oFI); err != nil { 1599 if errors.Is(err, errutil.ErrBreak) { 1600 return nil 1601 } 1602 return err 1603 } 1604 } 1605 } 1606 1607 func newClient(name string, options ...client.Option) (*client.APIClient, error) { 1608 if inWorkerStr, ok := os.LookupEnv("PACH_IN_WORKER"); ok { 1609 inWorker, err := strconv.ParseBool(inWorkerStr) 1610 if err != nil { 1611 return nil, errors.Wrap(err, "couldn't parse PACH_IN_WORKER") 1612 } 1613 if inWorker { 1614 return client.NewInWorker(options...) 1615 } 1616 } 1617 return client.NewOnUserMachine(name, options...) 1618 }