github.com/pachyderm/pachyderm@v1.13.4/src/server/pps/cmds/cmds.go (about) 1 package cmds 2 3 import ( 4 "bufio" 5 "bytes" 6 "encoding/json" 7 "fmt" 8 "io" 9 "io/ioutil" 10 "net/url" 11 "os" 12 "path/filepath" 13 "strings" 14 "time" 15 16 pachdclient "github.com/pachyderm/pachyderm/src/client" 17 "github.com/pachyderm/pachyderm/src/client/pfs" 18 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 19 "github.com/pachyderm/pachyderm/src/client/pkg/grpcutil" 20 "github.com/pachyderm/pachyderm/src/client/pkg/tracing/extended" 21 ppsclient "github.com/pachyderm/pachyderm/src/client/pps" 22 "github.com/pachyderm/pachyderm/src/client/version" 23 "github.com/pachyderm/pachyderm/src/server/cmd/pachctl/shell" 24 "github.com/pachyderm/pachyderm/src/server/pkg/cmdutil" 25 "github.com/pachyderm/pachyderm/src/server/pkg/pager" 26 "github.com/pachyderm/pachyderm/src/server/pkg/ppsutil" 27 "github.com/pachyderm/pachyderm/src/server/pkg/progress" 28 "github.com/pachyderm/pachyderm/src/server/pkg/serde" 29 "github.com/pachyderm/pachyderm/src/server/pkg/tabwriter" 30 "github.com/pachyderm/pachyderm/src/server/pkg/uuid" 31 "github.com/pachyderm/pachyderm/src/server/pps/pretty" 32 txncmds "github.com/pachyderm/pachyderm/src/server/transaction/cmds" 33 34 prompt "github.com/c-bata/go-prompt" 35 units "github.com/docker/go-units" 36 "github.com/fatih/color" 37 docker "github.com/fsouza/go-dockerclient" 38 "github.com/gogo/protobuf/proto" 39 "github.com/gogo/protobuf/types" 40 "github.com/itchyny/gojq" 41 glob "github.com/pachyderm/ohmyglob" 42 "github.com/sirupsen/logrus" 43 "github.com/spf13/cobra" 44 "github.com/spf13/pflag" 45 "golang.org/x/net/context" 46 ) 47 48 // encoder creates an encoder that writes data structures to w[0] (or os.Stdout 49 // if no 'w' is passed) in the serialization format 'format'. If more than one 50 // writer is passed, all writers after the first are silently ignored (rather 51 // than returning an error), and if the 'format' passed is unrecognized 52 // (currently, 'format' must be 'json' or 'yaml') then pachctl exits 53 // immediately. Ignoring errors or crashing simplifies the type signature of 54 // 'encoder' and allows it to be used inline. 55 func encoder(format string, w ...io.Writer) serde.Encoder { 56 format = strings.ToLower(format) 57 if format == "" { 58 format = "json" 59 } 60 var output io.Writer = os.Stdout 61 if len(w) > 0 { 62 output = w[0] 63 } 64 e, err := serde.GetEncoder(format, output, 65 serde.WithIndent(2), 66 serde.WithOrigName(true), 67 ) 68 if err != nil { 69 cmdutil.ErrorAndExit(err.Error()) 70 } 71 return e 72 } 73 74 // Cmds returns a slice containing pps commands. 75 func Cmds() []*cobra.Command { 76 var commands []*cobra.Command 77 78 raw := false 79 var output string 80 outputFlags := pflag.NewFlagSet("", pflag.ExitOnError) 81 outputFlags.BoolVar(&raw, "raw", false, "Disable pretty printing; serialize data structures to an encoding such as json or yaml") 82 // --output is empty by default, so that we can print an error if a user 83 // explicitly sets --output without --raw, but the effective default is set in 84 // encode(), which assumes "json" if 'format' is empty. 85 // Note: because of how spf13/flags works, no other StringVarP that sets 86 // 'output' can have a default value either 87 outputFlags.StringVarP(&output, "output", "o", "", "Output format when --raw is set: \"json\" or \"yaml\" (default \"json\")") 88 89 fullTimestamps := false 90 fullTimestampsFlags := pflag.NewFlagSet("", pflag.ContinueOnError) 91 fullTimestampsFlags.BoolVar(&fullTimestamps, "full-timestamps", false, "Return absolute timestamps (as opposed to the default, relative timestamps).") 92 93 noPager := false 94 noPagerFlags := pflag.NewFlagSet("", pflag.ContinueOnError) 95 noPagerFlags.BoolVar(&noPager, "no-pager", false, "Don't pipe output into a pager (i.e. less).") 96 97 jobDocs := &cobra.Command{ 98 Short: "Docs for jobs.", 99 Long: `Jobs are the basic units of computation in Pachyderm. 100 101 Jobs run a containerized workload over a set of finished input commits. Jobs are 102 created by pipelines and will write output to a commit in the pipeline's output 103 repo. A job can have multiple datums, each processed independently and the 104 results will be merged together at the end. 105 106 If the job fails, the output commit will not be populated with data.`, 107 } 108 commands = append(commands, cmdutil.CreateDocsAlias(jobDocs, "job", " job$")) 109 110 var block bool 111 inspectJob := &cobra.Command{ 112 Use: "{{alias}} <job>", 113 Short: "Return info about a job.", 114 Long: "Return info about a job.", 115 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 116 client, err := pachdclient.NewOnUserMachine("user") 117 if err != nil { 118 return err 119 } 120 defer client.Close() 121 jobInfo, err := client.InspectJob(args[0], block, true) 122 if err != nil { 123 cmdutil.ErrorAndExit("error from InspectJob: %s", err.Error()) 124 } 125 if jobInfo == nil { 126 cmdutil.ErrorAndExit("job %s not found.", args[0]) 127 } 128 if raw { 129 return encoder(output).EncodeProto(jobInfo) 130 } else if output != "" { 131 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 132 } 133 ji := &pretty.PrintableJobInfo{ 134 JobInfo: jobInfo, 135 FullTimestamps: fullTimestamps, 136 } 137 return pretty.PrintDetailedJobInfo(os.Stdout, ji) 138 }), 139 } 140 inspectJob.Flags().BoolVarP(&block, "block", "b", false, "block until the job has either succeeded or failed") 141 inspectJob.Flags().AddFlagSet(outputFlags) 142 inspectJob.Flags().AddFlagSet(fullTimestampsFlags) 143 shell.RegisterCompletionFunc(inspectJob, shell.JobCompletion) 144 commands = append(commands, cmdutil.CreateAlias(inspectJob, "inspect job")) 145 146 var pipelineName string 147 var outputCommitStr string 148 var inputCommitStrs []string 149 var history string 150 var stateStrs []string 151 listJob := &cobra.Command{ 152 Short: "Return info about jobs.", 153 Long: "Return info about jobs.", 154 Example: ` 155 # Return all jobs 156 $ {{alias}} 157 158 # Return all jobs from the most recent version of pipeline "foo" 159 $ {{alias}} -p foo 160 161 # Return all jobs from all versions of pipeline "foo" 162 $ {{alias}} -p foo --history all 163 164 # Return all jobs whose input commits include foo@XXX and bar@YYY 165 $ {{alias}} -i foo@XXX -i bar@YYY 166 167 # Return all jobs in pipeline foo and whose input commits include bar@YYY 168 $ {{alias}} -p foo -i bar@YYY`, 169 Run: cmdutil.RunFixedArgs(0, func(args []string) error { 170 commits, err := cmdutil.ParseCommits(inputCommitStrs) 171 if err != nil { 172 return err 173 } 174 history, err := cmdutil.ParseHistory(history) 175 if err != nil { 176 return errors.Wrapf(err, "error parsing history flag") 177 } 178 var outputCommit *pfs.Commit 179 if outputCommitStr != "" { 180 outputCommit, err = cmdutil.ParseCommit(outputCommitStr) 181 if err != nil { 182 return err 183 } 184 } 185 var filter string 186 if len(stateStrs) > 0 { 187 filter, err = ParseJobStates(stateStrs) 188 if err != nil { 189 return errors.Wrap(err, "error parsing state") 190 } 191 } 192 193 client, err := pachdclient.NewOnUserMachine("user") 194 if err != nil { 195 return err 196 } 197 defer client.Close() 198 199 return pager.Page(noPager, os.Stdout, func(w io.Writer) error { 200 if raw { 201 e := encoder(output) 202 return client.ListJobFilterF(pipelineName, commits, outputCommit, history, true, filter, func(ji *ppsclient.JobInfo) error { 203 return e.EncodeProto(ji) 204 }) 205 } else if output != "" { 206 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 207 } 208 writer := tabwriter.NewWriter(w, pretty.JobHeader) 209 if err := client.ListJobFilterF(pipelineName, commits, outputCommit, history, false, filter, func(ji *ppsclient.JobInfo) error { 210 pretty.PrintJobInfo(writer, ji, fullTimestamps) 211 return nil 212 }); err != nil { 213 return err 214 } 215 return writer.Flush() 216 }) 217 }), 218 } 219 listJob.Flags().StringVarP(&pipelineName, "pipeline", "p", "", "Limit to jobs made by pipeline.") 220 listJob.MarkFlagCustom("pipeline", "__pachctl_get_pipeline") 221 listJob.Flags().StringVarP(&outputCommitStr, "output", "o", "", "List jobs with a specific output commit. format: <repo>@<branch-or-commit>") 222 listJob.MarkFlagCustom("output", "__pachctl_get_repo_commit") 223 listJob.Flags().StringSliceVarP(&inputCommitStrs, "input", "i", []string{}, "List jobs with a specific set of input commits. format: <repo>@<branch-or-commit>") 224 listJob.MarkFlagCustom("input", "__pachctl_get_repo_commit") 225 listJob.Flags().AddFlagSet(outputFlags) 226 listJob.Flags().AddFlagSet(fullTimestampsFlags) 227 listJob.Flags().AddFlagSet(noPagerFlags) 228 listJob.Flags().StringVar(&history, "history", "none", "Return jobs from historical versions of pipelines.") 229 listJob.Flags().StringArrayVar(&stateStrs, "state", []string{}, "Return only jobs with the specified state. Can be repeated to include multiple states") 230 shell.RegisterCompletionFunc(listJob, 231 func(flag, text string, maxCompletions int64) ([]prompt.Suggest, shell.CacheFunc) { 232 if flag == "-p" || flag == "--pipeline" { 233 cs, cf := shell.PipelineCompletion(flag, text, maxCompletions) 234 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 235 } 236 return nil, shell.SameFlag(flag) 237 }) 238 commands = append(commands, cmdutil.CreateAlias(listJob, "list job")) 239 240 var pipelines cmdutil.RepeatedStringArg 241 flushJob := &cobra.Command{ 242 Use: "{{alias}} <repo>@<branch-or-commit> ...", 243 Short: "Wait for all jobs caused by the specified commits to finish and return them.", 244 Long: "Wait for all jobs caused by the specified commits to finish and return them.", 245 Example: ` 246 # Return jobs caused by foo@XXX and bar@YYY. 247 $ {{alias}} foo@XXX bar@YYY 248 249 # Return jobs caused by foo@XXX leading to pipelines bar and baz. 250 $ {{alias}} foo@XXX -p bar -p baz`, 251 Run: cmdutil.Run(func(args []string) error { 252 if output != "" && !raw { 253 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 254 } 255 commits, err := cmdutil.ParseCommits(args) 256 if err != nil { 257 return err 258 } 259 260 c, err := pachdclient.NewOnUserMachine("user") 261 if err != nil { 262 return err 263 } 264 defer c.Close() 265 var writer *tabwriter.Writer 266 if !raw { 267 writer = tabwriter.NewWriter(os.Stdout, pretty.JobHeader) 268 } 269 e := encoder(output) 270 if err := c.FlushJob(commits, pipelines, func(ji *ppsclient.JobInfo) error { 271 if raw { 272 if err := e.EncodeProto(ji); err != nil { 273 return err 274 } 275 return nil 276 } 277 pretty.PrintJobInfo(writer, ji, fullTimestamps) 278 return nil 279 }); err != nil { 280 return err 281 } 282 if !raw { 283 return writer.Flush() 284 } 285 return nil 286 }), 287 } 288 flushJob.Flags().VarP(&pipelines, "pipeline", "p", "Wait only for jobs leading to a specific set of pipelines") 289 flushJob.MarkFlagCustom("pipeline", "__pachctl_get_pipeline") 290 flushJob.Flags().AddFlagSet(outputFlags) 291 flushJob.Flags().AddFlagSet(fullTimestampsFlags) 292 shell.RegisterCompletionFunc(flushJob, 293 func(flag, text string, maxCompletions int64) ([]prompt.Suggest, shell.CacheFunc) { 294 if flag == "--pipeline" || flag == "-p" { 295 cs, cf := shell.PipelineCompletion(flag, text, maxCompletions) 296 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 297 } 298 cs, cf := shell.BranchCompletion(flag, text, maxCompletions) 299 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 300 }) 301 commands = append(commands, cmdutil.CreateAlias(flushJob, "flush job")) 302 303 deleteJob := &cobra.Command{ 304 Use: "{{alias}} <job>", 305 Short: "Delete a job.", 306 Long: "Delete a job.", 307 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 308 client, err := pachdclient.NewOnUserMachine("user") 309 if err != nil { 310 return err 311 } 312 defer client.Close() 313 if err := client.DeleteJob(args[0]); err != nil { 314 cmdutil.ErrorAndExit("error from DeleteJob: %s", err.Error()) 315 } 316 return nil 317 }), 318 } 319 shell.RegisterCompletionFunc(deleteJob, shell.JobCompletion) 320 commands = append(commands, cmdutil.CreateAlias(deleteJob, "delete job")) 321 322 stopJob := &cobra.Command{ 323 Use: "{{alias}} <job>", 324 Short: "Stop a job.", 325 Long: "Stop a job. The job will be stopped immediately.", 326 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 327 client, err := pachdclient.NewOnUserMachine("user") 328 if err != nil { 329 return err 330 } 331 defer client.Close() 332 if err := client.StopJob(args[0]); err != nil { 333 cmdutil.ErrorAndExit("error from StopJob: %s", err.Error()) 334 } 335 return nil 336 }), 337 } 338 shell.RegisterCompletionFunc(stopJob, shell.JobCompletion) 339 commands = append(commands, cmdutil.CreateAlias(stopJob, "stop job")) 340 341 datumDocs := &cobra.Command{ 342 Short: "Docs for datums.", 343 Long: `Datums are the small independent units of processing for Pachyderm jobs. 344 345 A datum is defined by applying a glob pattern (in the pipeline spec) to the file 346 paths in the input repo. A datum can include one or more files or directories. 347 348 Datums within a job will be processed independently, sometimes distributed 349 across separate workers. A separate execution of user code will be run for 350 each datum.`, 351 } 352 commands = append(commands, cmdutil.CreateDocsAlias(datumDocs, "datum", " datum$")) 353 354 restartDatum := &cobra.Command{ 355 Use: "{{alias}} <job> <datum-path1>,<datum-path2>,...", 356 Short: "Restart a datum.", 357 Long: "Restart a datum.", 358 Run: cmdutil.RunFixedArgs(2, func(args []string) error { 359 client, err := pachdclient.NewOnUserMachine("user") 360 if err != nil { 361 return err 362 } 363 defer client.Close() 364 datumFilter := strings.Split(args[1], ",") 365 for i := 0; i < len(datumFilter); { 366 if len(datumFilter[i]) == 0 { 367 if i+1 < len(datumFilter) { 368 copy(datumFilter[i:], datumFilter[i+1:]) 369 } 370 datumFilter = datumFilter[:len(datumFilter)-1] 371 } else { 372 i++ 373 } 374 } 375 return client.RestartDatum(args[0], datumFilter) 376 }), 377 } 378 commands = append(commands, cmdutil.CreateAlias(restartDatum, "restart datum")) 379 380 var pageSize int64 381 var page int64 382 var pipelineInputPath string 383 var statusOnly bool 384 listDatum := &cobra.Command{ 385 Use: "{{alias}} <job>", 386 Short: "Return the datums in a job.", 387 Long: "Return the datums in a job.", 388 Run: cmdutil.RunBoundedArgs(0, 1, func(args []string) (retErr error) { 389 client, err := pachdclient.NewOnUserMachine("user") 390 if err != nil { 391 return err 392 } 393 defer client.Close() 394 if pageSize < 0 { 395 return errors.Errorf("pageSize must be zero or positive") 396 } 397 if page < 0 { 398 return errors.Errorf("page must be zero or positive") 399 } 400 var printF func(*ppsclient.DatumInfo) error 401 if !raw { 402 if output != "" { 403 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 404 } 405 writer := tabwriter.NewWriter(os.Stdout, pretty.DatumHeader) 406 printF = func(di *ppsclient.DatumInfo) error { 407 pretty.PrintDatumInfo(writer, di) 408 return nil 409 } 410 defer func() { 411 if err := writer.Flush(); retErr == nil { 412 retErr = err 413 } 414 }() 415 } else { 416 e := encoder(output) 417 printF = func(di *ppsclient.DatumInfo) error { 418 return e.EncodeProto(di) 419 } 420 } 421 if pipelineInputPath != "" && len(args) == 1 { 422 return errors.Errorf("can't specify both a job and a pipeline spec") 423 } else if pipelineInputPath != "" { 424 pipelineReader, err := ppsutil.NewPipelineManifestReader(pipelineInputPath) 425 if err != nil { 426 return err 427 } 428 request, err := pipelineReader.NextCreatePipelineRequest() 429 if err != nil { 430 return err 431 } 432 return client.ListDatumInputF(request.Input, pageSize, page, printF) 433 } else if len(args) == 1 { 434 var options []pachdclient.ListDatumOption 435 if statusOnly { 436 options = append(options, pachdclient.WithStatusOnly()) 437 } 438 return client.ListDatumF(args[0], pageSize, page, printF, options...) 439 } else { 440 return errors.Errorf("must specify either a job or a pipeline spec") 441 } 442 }), 443 } 444 listDatum.Flags().Int64Var(&pageSize, "pageSize", 0, "Specify the number of results sent back in a single page") 445 listDatum.Flags().Int64Var(&page, "page", 0, "Specify the page of results to send") 446 listDatum.Flags().StringVarP(&pipelineInputPath, "file", "f", "", "The JSON file containing the pipeline to list datums from, the pipeline need not exist") 447 listDatum.Flags().BoolVar(&statusOnly, "status-only", false, "Only retrieve status info for datums, improving performance") 448 listDatum.Flags().AddFlagSet(outputFlags) 449 shell.RegisterCompletionFunc(listDatum, shell.JobCompletion) 450 commands = append(commands, cmdutil.CreateAlias(listDatum, "list datum")) 451 452 inspectDatum := &cobra.Command{ 453 Use: "{{alias}} <job> <datum>", 454 Short: "Display detailed info about a single datum.", 455 Long: "Display detailed info about a single datum. Requires the pipeline to have stats enabled.", 456 Run: cmdutil.RunFixedArgs(2, func(args []string) error { 457 client, err := pachdclient.NewOnUserMachine("user") 458 if err != nil { 459 return err 460 } 461 defer client.Close() 462 datumInfo, err := client.InspectDatum(args[0], args[1]) 463 if err != nil { 464 return err 465 } 466 if raw { 467 return encoder(output).EncodeProto(datumInfo) 468 } else if output != "" { 469 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 470 } 471 pretty.PrintDetailedDatumInfo(os.Stdout, datumInfo) 472 return nil 473 }), 474 } 475 inspectDatum.Flags().AddFlagSet(outputFlags) 476 commands = append(commands, cmdutil.CreateAlias(inspectDatum, "inspect datum")) 477 478 var ( 479 jobID string 480 datumID string 481 commaInputs string // comma-separated list of input files of interest 482 master bool 483 worker bool 484 follow bool 485 tail int64 486 since string 487 ) 488 489 // prettyLogsPrinter helps to print the logs recieved in different colours 490 prettyLogsPrinter := func(message string) { 491 informationArray := strings.Split(message, " ") 492 if len(informationArray) > 1 { 493 debugString := informationArray[1] 494 debugLevel := strings.ToLower(debugString) 495 var debugLevelColoredString string 496 if debugLevel == "info" { 497 debugLevelColoredString = color.New(color.FgGreen).Sprint(debugString) 498 } else if debugLevel == "warning" { 499 debugLevelColoredString = color.New(color.FgYellow).Sprint(debugString) 500 } else if debugLevel == "error" { 501 debugLevelColoredString = color.New(color.FgRed).Sprint(debugString) 502 } else { 503 debugLevelColoredString = debugString 504 } 505 informationArray[1] = debugLevelColoredString 506 coloredMessage := strings.Join(informationArray, " ") 507 fmt.Println(coloredMessage) 508 } else { 509 fmt.Println(message) 510 } 511 512 } 513 514 getLogs := &cobra.Command{ 515 Use: "{{alias}} [--pipeline=<pipeline>|--job=<job>] [--datum=<datum>]", 516 Short: "Return logs from a job.", 517 Long: "Return logs from a job.", 518 Example: ` 519 # Return logs emitted by recent jobs in the "filter" pipeline 520 $ {{alias}} --pipeline=filter 521 522 # Return logs emitted by the job aedfa12aedf 523 $ {{alias}} --job=aedfa12aedf 524 525 # Return logs emitted by the pipeline \"filter\" while processing /apple.txt and a file with the hash 123aef 526 $ {{alias}} --pipeline=filter --inputs=/apple.txt,123aef`, 527 Run: cmdutil.RunFixedArgs(0, func(args []string) error { 528 client, err := pachdclient.NewOnUserMachine("user") 529 if err != nil { 530 return errors.Wrapf(err, "error connecting to pachd") 531 } 532 defer client.Close() 533 534 // Break up comma-separated input paths, and filter out empty entries 535 data := strings.Split(commaInputs, ",") 536 for i := 0; i < len(data); { 537 if len(data[i]) == 0 { 538 if i+1 < len(data) { 539 copy(data[i:], data[i+1:]) 540 } 541 data = data[:len(data)-1] 542 } else { 543 i++ 544 } 545 } 546 since, err := time.ParseDuration(since) 547 if err != nil { 548 return errors.Wrapf(err, "error parsing since(%q)", since) 549 } 550 if tail != 0 { 551 return errors.Errorf("tail has been deprecated and removed from Pachyderm, use --since instead") 552 } 553 554 // Issue RPC 555 iter := client.GetLogs(pipelineName, jobID, data, datumID, master, follow, since) 556 var buf bytes.Buffer 557 encoder := json.NewEncoder(&buf) 558 for iter.Next() { 559 if raw { 560 buf.Reset() 561 if err := encoder.Encode(iter.Message()); err != nil { 562 fmt.Fprintf(os.Stderr, "error marshalling \"%v\": %s\n", iter.Message(), err) 563 } 564 fmt.Println(buf.String()) 565 } else if iter.Message().User && !master && !worker { 566 prettyLogsPrinter(iter.Message().Message) 567 } else if iter.Message().Master && master { 568 prettyLogsPrinter(iter.Message().Message) 569 } else if !iter.Message().User && !iter.Message().Master && worker { 570 prettyLogsPrinter(iter.Message().Message) 571 } else if pipelineName == "" && jobID == "" { 572 prettyLogsPrinter(iter.Message().Message) 573 } 574 } 575 return iter.Err() 576 }), 577 } 578 getLogs.Flags().StringVarP(&pipelineName, "pipeline", "p", "", "Filter the log "+ 579 "for lines from this pipeline (accepts pipeline name)") 580 getLogs.MarkFlagCustom("pipeline", "__pachctl_get_pipeline") 581 getLogs.Flags().StringVarP(&jobID, "job", "j", "", "Filter for log lines from "+ 582 "this job (accepts job ID)") 583 getLogs.MarkFlagCustom("job", "__pachctl_get_job") 584 getLogs.Flags().StringVar(&datumID, "datum", "", "Filter for log lines for this datum (accepts datum ID)") 585 getLogs.Flags().StringVar(&commaInputs, "inputs", "", "Filter for log lines "+ 586 "generated while processing these files (accepts PFS paths or file hashes)") 587 getLogs.Flags().BoolVar(&master, "master", false, "Return log messages from the master process (pipeline must be set).") 588 getLogs.Flags().BoolVar(&worker, "worker", false, "Return log messages from the worker process.") 589 getLogs.Flags().BoolVar(&raw, "raw", false, "Return log messages verbatim from server.") 590 getLogs.Flags().BoolVarP(&follow, "follow", "f", false, "Follow logs as more are created.") 591 getLogs.Flags().Int64VarP(&tail, "tail", "t", 0, "Lines of recent logs to display.") 592 getLogs.Flags().StringVar(&since, "since", "24h", "Return log messages more recent than \"since\".") 593 shell.RegisterCompletionFunc(getLogs, 594 func(flag, text string, maxCompletions int64) ([]prompt.Suggest, shell.CacheFunc) { 595 if flag == "--pipeline" || flag == "-p" { 596 cs, cf := shell.PipelineCompletion(flag, text, maxCompletions) 597 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 598 } 599 if flag == "--job" || flag == "-j" { 600 cs, cf := shell.JobCompletion(flag, text, maxCompletions) 601 return cs, shell.AndCacheFunc(cf, shell.SameFlag(flag)) 602 } 603 return nil, shell.SameFlag(flag) 604 }) 605 commands = append(commands, cmdutil.CreateAlias(getLogs, "logs")) 606 607 pipelineDocs := &cobra.Command{ 608 Short: "Docs for pipelines.", 609 Long: `Pipelines are a powerful abstraction for automating jobs. 610 611 Pipelines take a set of repos and branches as inputs and will write to a single 612 output repo of the same name. Pipelines then subscribe to commits on those repos 613 and launch a job to process each incoming commit. 614 615 All jobs created by a pipeline will create commits in the pipeline's output repo.`, 616 } 617 commands = append(commands, cmdutil.CreateDocsAlias(pipelineDocs, "pipeline", " pipeline$")) 618 619 var build bool 620 var pushImages bool 621 var registry string 622 var username string 623 var pipelinePath string 624 createPipeline := &cobra.Command{ 625 Short: "Create a new pipeline.", 626 Long: "Create a new pipeline from a pipeline specification. For details on the format, see http://docs.pachyderm.io/en/latest/reference/pipeline_spec.html.", 627 Run: cmdutil.RunFixedArgs(0, func(args []string) (retErr error) { 628 return pipelineHelper(false, build, pushImages, registry, username, pipelinePath, false) 629 }), 630 } 631 createPipeline.Flags().StringVarP(&pipelinePath, "file", "f", "-", "The JSON file containing the pipeline, it can be a url or local file. - reads from stdin.") 632 createPipeline.Flags().BoolVarP(&build, "build", "b", false, "If true, build and push local docker images into the docker registry.") 633 createPipeline.Flags().BoolVarP(&pushImages, "push-images", "p", false, "If true, push local docker images into the docker registry.") 634 createPipeline.Flags().StringVarP(®istry, "registry", "r", "index.docker.io", "The registry to push images to.") 635 createPipeline.Flags().StringVarP(&username, "username", "u", "", "The username to push images as.") 636 commands = append(commands, cmdutil.CreateAlias(createPipeline, "create pipeline")) 637 638 var reprocess bool 639 updatePipeline := &cobra.Command{ 640 Short: "Update an existing Pachyderm pipeline.", 641 Long: "Update a Pachyderm pipeline with a new pipeline specification. For details on the format, see http://docs.pachyderm.io/en/latest/reference/pipeline_spec.html.", 642 Run: cmdutil.RunFixedArgs(0, func(args []string) (retErr error) { 643 return pipelineHelper(reprocess, build, pushImages, registry, username, pipelinePath, true) 644 }), 645 } 646 updatePipeline.Flags().StringVarP(&pipelinePath, "file", "f", "-", "The JSON file containing the pipeline, it can be a url or local file. - reads from stdin.") 647 updatePipeline.Flags().BoolVarP(&build, "build", "b", false, "If true, build and push local docker images into the docker registry.") 648 updatePipeline.Flags().BoolVarP(&pushImages, "push-images", "p", false, "If true, push local docker images into the docker registry.") 649 updatePipeline.Flags().StringVarP(®istry, "registry", "r", "index.docker.io", "The registry to push images to.") 650 updatePipeline.Flags().StringVarP(&username, "username", "u", "", "The username to push images as.") 651 updatePipeline.Flags().BoolVar(&reprocess, "reprocess", false, "If true, reprocess datums that were already processed by previous version of the pipeline.") 652 commands = append(commands, cmdutil.CreateAlias(updatePipeline, "update pipeline")) 653 654 runPipeline := &cobra.Command{ 655 Use: "{{alias}} <pipeline> [<repo>@[<branch>|<commit>|<branch>=<commit>]...]", 656 Short: "Run an existing Pachyderm pipeline on the specified commits-branch pairs.", 657 Long: "Run a Pachyderm pipeline on the datums from specific commit-branch pairs. If you only specify a branch, Pachyderm uses the HEAD commit to complete the pair. Similarly, if you only specify a commit, Pachyderm will try to use the branch the commit originated on. Note: Pipelines run automatically when data is committed to them. This command is for the case where you want to run the pipeline on a specific set of data.", 658 Example: ` 659 # Rerun the latest job for the "filter" pipeline 660 $ {{alias}} filter 661 662 # Process the pipeline "filter" on the data from commit-branch pairs "repo1@A=a23e4" and "repo2@B=bf363" 663 $ {{alias}} filter repo1@A=a23e4 repo2@B=bf363 664 665 # Run the pipeline "filter" on the data from commit "167af5" on the "staging" branch on repo "repo1" 666 $ {{alias}} filter repo1@staging=167af5 667 668 # Run the pipeline "filter" on the HEAD commit of the "testing" branch on repo "repo1" 669 $ {{alias}} filter repo1@testing 670 671 # Run the pipeline "filter" on the commit "af159e which originated on the "master" branch on repo "repo1" 672 $ {{alias}} filter repo1@af159`, 673 674 Run: cmdutil.RunMinimumArgs(1, func(args []string) (retErr error) { 675 client, err := pachdclient.NewOnUserMachine("user") 676 if err != nil { 677 return err 678 } 679 defer client.Close() 680 prov, err := cmdutil.ParseCommitProvenances(args[1:]) 681 if err != nil { 682 return err 683 } 684 err = client.RunPipeline(args[0], prov, jobID) 685 if err != nil { 686 return err 687 } 688 return nil 689 }), 690 } 691 runPipeline.Flags().StringVar(&jobID, "job", "", "rerun the given job") 692 commands = append(commands, cmdutil.CreateAlias(runPipeline, "run pipeline")) 693 694 runCron := &cobra.Command{ 695 Use: "{{alias}} <pipeline>", 696 Short: "Run an existing Pachyderm cron pipeline now", 697 Long: "Run an existing Pachyderm cron pipeline now", 698 Example: ` 699 # Run a cron pipeline "clock" now 700 $ {{alias}} clock`, 701 Run: cmdutil.RunMinimumArgs(1, func(args []string) (retErr error) { 702 client, err := pachdclient.NewOnUserMachine("user") 703 if err != nil { 704 return err 705 } 706 defer client.Close() 707 err = client.RunCron(args[0]) 708 if err != nil { 709 return err 710 } 711 return nil 712 }), 713 } 714 commands = append(commands, cmdutil.CreateAlias(runCron, "run cron")) 715 716 inspectPipeline := &cobra.Command{ 717 Use: "{{alias}} <pipeline>", 718 Short: "Return info about a pipeline.", 719 Long: "Return info about a pipeline.", 720 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 721 client, err := pachdclient.NewOnUserMachine("user") 722 if err != nil { 723 return err 724 } 725 defer client.Close() 726 pipelineInfo, err := client.InspectPipeline(args[0]) 727 if err != nil { 728 return err 729 } 730 if pipelineInfo == nil { 731 return errors.Errorf("pipeline %s not found", args[0]) 732 } 733 if raw { 734 return encoder(output).EncodeProto(pipelineInfo) 735 } else if output != "" { 736 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw") 737 } 738 pi := &pretty.PrintablePipelineInfo{ 739 PipelineInfo: pipelineInfo, 740 FullTimestamps: fullTimestamps, 741 } 742 return pretty.PrintDetailedPipelineInfo(os.Stdout, pi) 743 }), 744 } 745 inspectPipeline.Flags().AddFlagSet(outputFlags) 746 inspectPipeline.Flags().AddFlagSet(fullTimestampsFlags) 747 commands = append(commands, cmdutil.CreateAlias(inspectPipeline, "inspect pipeline")) 748 749 extractPipeline := &cobra.Command{ 750 Use: "{{alias}} <pipeline>", 751 Short: "Return the manifest used to create a pipeline.", 752 Long: "Return the manifest used to create a pipeline.", 753 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 754 client, err := pachdclient.NewOnUserMachine("user") 755 if err != nil { 756 return err 757 } 758 defer client.Close() 759 createPipelineRequest, err := client.ExtractPipeline(args[0]) 760 if err != nil { 761 return err 762 } 763 return encoder(output).EncodeProto(createPipelineRequest) 764 }), 765 } 766 extractPipeline.Flags().StringVarP(&output, "output", "o", "", "Output format: \"json\" or \"yaml\" (default \"json\")") 767 commands = append(commands, cmdutil.CreateAlias(extractPipeline, "extract pipeline")) 768 769 var editor string 770 var editorArgs []string 771 editPipeline := &cobra.Command{ 772 Use: "{{alias}} <pipeline>", 773 Short: "Edit the manifest for a pipeline in your text editor.", 774 Long: "Edit the manifest for a pipeline in your text editor.", 775 Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) { 776 client, err := pachdclient.NewOnUserMachine("user") 777 if err != nil { 778 return err 779 } 780 defer client.Close() 781 createPipelineRequest, err := client.ExtractPipeline(args[0]) 782 if err != nil { 783 return err 784 } 785 f, err := ioutil.TempFile("", args[0]) 786 if err != nil { 787 return err 788 } 789 if err := encoder(output, f).EncodeProto(createPipelineRequest); err != nil { 790 return err 791 } 792 defer func() { 793 if err := f.Close(); err != nil && retErr == nil { 794 retErr = err 795 } 796 }() 797 if editor == "" { 798 editor = os.Getenv("EDITOR") 799 } 800 if editor == "" { 801 editor = "vim" 802 } 803 editorArgs = strings.Split(editor, " ") 804 editorArgs = append(editorArgs, f.Name()) 805 if err := cmdutil.RunIO(cmdutil.IO{ 806 Stdin: os.Stdin, 807 Stdout: os.Stdout, 808 Stderr: os.Stderr, 809 }, editorArgs...); err != nil { 810 return err 811 } 812 pipelineReader, err := ppsutil.NewPipelineManifestReader(f.Name()) 813 if err != nil { 814 return err 815 } 816 request, err := pipelineReader.NextCreatePipelineRequest() 817 if err != nil { 818 return err 819 } 820 if proto.Equal(createPipelineRequest, request) { 821 fmt.Println("Pipeline unchanged, no update will be performed.") 822 return nil 823 } 824 request.Update = true 825 request.Reprocess = reprocess 826 return txncmds.WithActiveTransaction(client, func(txClient *pachdclient.APIClient) error { 827 _, err := txClient.PpsAPIClient.CreatePipeline( 828 txClient.Ctx(), 829 request, 830 ) 831 return grpcutil.ScrubGRPC(err) 832 }) 833 }), 834 } 835 editPipeline.Flags().BoolVar(&reprocess, "reprocess", false, "If true, reprocess datums that were already processed by previous version of the pipeline.") 836 editPipeline.Flags().StringVar(&editor, "editor", "", "Editor to use for modifying the manifest.") 837 editPipeline.Flags().StringVarP(&output, "output", "o", "", "Output format: \"json\" or \"yaml\" (default \"json\")") 838 commands = append(commands, cmdutil.CreateAlias(editPipeline, "edit pipeline")) 839 840 var spec bool 841 listPipeline := &cobra.Command{ 842 Use: "{{alias}} [<pipeline>]", 843 Short: "Return info about all pipelines.", 844 Long: "Return info about all pipelines.", 845 Run: cmdutil.RunBoundedArgs(0, 1, func(args []string) error { 846 // validate flags 847 if raw && spec { 848 return errors.Errorf("cannot set both --raw and --spec") 849 } else if !raw && !spec && output != "" { 850 cmdutil.ErrorAndExit("cannot set --output (-o) without --raw or --spec") 851 } 852 history, err := cmdutil.ParseHistory(history) 853 if err != nil { 854 return errors.Wrapf(err, "error parsing history flag") 855 } 856 var filter string 857 if len(stateStrs) > 0 { 858 filter, err = ParsePipelineStates(stateStrs) 859 if err != nil { 860 return errors.Wrap(err, "error parsing state") 861 } 862 } 863 // init client & get pipeline info 864 client, err := pachdclient.NewOnUserMachine("user") 865 if err != nil { 866 return errors.Wrapf(err, "error connecting to pachd") 867 } 868 defer client.Close() 869 var pipeline string 870 if len(args) > 0 { 871 pipeline = args[0] 872 } 873 request := &ppsclient.ListPipelineRequest{History: history, AllowIncomplete: true, JqFilter: filter} 874 if pipeline != "" { 875 request.Pipeline = pachdclient.NewPipeline(pipeline) 876 } 877 response, err := client.PpsAPIClient.ListPipeline(client.Ctx(), request) 878 if err != nil { 879 return grpcutil.ScrubGRPC(err) 880 } 881 pipelineInfos := response.PipelineInfo 882 if raw { 883 e := encoder(output) 884 for _, pipelineInfo := range pipelineInfos { 885 if err := e.EncodeProto(pipelineInfo); err != nil { 886 return err 887 } 888 } 889 return nil 890 } else if spec { 891 e := encoder(output) 892 for _, pipelineInfo := range pipelineInfos { 893 if err := e.EncodeProto(ppsutil.PipelineReqFromInfo(pipelineInfo)); err != nil { 894 return err 895 } 896 } 897 return nil 898 } 899 for _, pi := range pipelineInfos { 900 if ppsutil.ErrorState(pi.State) { 901 fmt.Fprintln(os.Stderr, "One or more pipelines have encountered errors, use inspect pipeline to get more info.") 902 break 903 } 904 } 905 writer := tabwriter.NewWriter(os.Stdout, pretty.PipelineHeader) 906 for _, pipelineInfo := range pipelineInfos { 907 pretty.PrintPipelineInfo(writer, pipelineInfo, fullTimestamps) 908 } 909 return writer.Flush() 910 }), 911 } 912 listPipeline.Flags().BoolVarP(&spec, "spec", "s", false, "Output 'create pipeline' compatibility specs.") 913 listPipeline.Flags().AddFlagSet(outputFlags) 914 listPipeline.Flags().AddFlagSet(fullTimestampsFlags) 915 listPipeline.Flags().StringVar(&history, "history", "none", "Return revision history for pipelines.") 916 listPipeline.Flags().StringArrayVar(&stateStrs, "state", []string{}, "Return only pipelines with the specified state. Can be repeated to include multiple states") 917 commands = append(commands, cmdutil.CreateAlias(listPipeline, "list pipeline")) 918 919 var ( 920 all bool 921 force bool 922 keepRepo bool 923 splitTransaction bool 924 ) 925 deletePipeline := &cobra.Command{ 926 Use: "{{alias}} (<pipeline>|--all)", 927 Short: "Delete a pipeline.", 928 Long: "Delete a pipeline.", 929 Run: cmdutil.RunBoundedArgs(0, 1, func(args []string) error { 930 client, err := pachdclient.NewOnUserMachine("user") 931 if err != nil { 932 return err 933 } 934 defer client.Close() 935 if len(args) > 0 && all { 936 return errors.Errorf("cannot use the --all flag with an argument") 937 } 938 if len(args) == 0 && !all { 939 return errors.Errorf("either a pipeline name or the --all flag needs to be provided") 940 } 941 if splitTransaction { 942 fmt.Println("WARNING: If using the --split-txn flag, this command must run until complete. If a failure or incomplete run occurs, then Pachyderm will be left in an inconsistent state. To resolve an inconsistent state, rerun this command.") 943 if ok, err := cmdutil.InteractiveConfirm(); err != nil { 944 return err 945 } else if !ok { 946 return nil 947 } 948 } 949 req := &ppsclient.DeletePipelineRequest{ 950 All: all, 951 Force: force, 952 KeepRepo: keepRepo, 953 SplitTransaction: splitTransaction, 954 } 955 if len(args) > 0 { 956 req.Pipeline = pachdclient.NewPipeline(args[0]) 957 } 958 if _, err = client.PpsAPIClient.DeletePipeline(client.Ctx(), req); err != nil { 959 return grpcutil.ScrubGRPC(err) 960 } 961 return nil 962 }), 963 } 964 deletePipeline.Flags().BoolVar(&all, "all", false, "delete all pipelines") 965 deletePipeline.Flags().BoolVarP(&force, "force", "f", false, "delete the pipeline regardless of errors; use with care") 966 deletePipeline.Flags().BoolVar(&keepRepo, "keep-repo", false, "delete the pipeline, but keep the output repo around (the pipeline can be recreated later and use the same repo)") 967 deletePipeline.Flags().BoolVar(&splitTransaction, "split-txn", false, "split large transactions into multiple smaller transactions") 968 commands = append(commands, cmdutil.CreateAlias(deletePipeline, "delete pipeline")) 969 970 startPipeline := &cobra.Command{ 971 Use: "{{alias}} <pipeline>", 972 Short: "Restart a stopped pipeline.", 973 Long: "Restart a stopped pipeline.", 974 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 975 client, err := pachdclient.NewOnUserMachine("user") 976 if err != nil { 977 return err 978 } 979 defer client.Close() 980 if err := client.StartPipeline(args[0]); err != nil { 981 cmdutil.ErrorAndExit("error from StartPipeline: %s", err.Error()) 982 } 983 return nil 984 }), 985 } 986 commands = append(commands, cmdutil.CreateAlias(startPipeline, "start pipeline")) 987 988 stopPipeline := &cobra.Command{ 989 Use: "{{alias}} <pipeline>", 990 Short: "Stop a running pipeline.", 991 Long: "Stop a running pipeline.", 992 Run: cmdutil.RunFixedArgs(1, func(args []string) error { 993 client, err := pachdclient.NewOnUserMachine("user") 994 if err != nil { 995 return err 996 } 997 defer client.Close() 998 if err := client.StopPipeline(args[0]); err != nil { 999 cmdutil.ErrorAndExit("error from StopPipeline: %s", err.Error()) 1000 } 1001 return nil 1002 }), 1003 } 1004 commands = append(commands, cmdutil.CreateAlias(stopPipeline, "stop pipeline")) 1005 1006 var file string 1007 createSecret := &cobra.Command{ 1008 Short: "Create a secret on the cluster.", 1009 Long: "Create a secret on the cluster.", 1010 Run: cmdutil.RunFixedArgs(0, func(args []string) (retErr error) { 1011 client, err := pachdclient.NewOnUserMachine("user") 1012 if err != nil { 1013 return err 1014 } 1015 defer client.Close() 1016 fileBytes, err := ioutil.ReadFile(file) 1017 if err != nil { 1018 return err 1019 } 1020 1021 _, err = client.PpsAPIClient.CreateSecret( 1022 client.Ctx(), 1023 &ppsclient.CreateSecretRequest{ 1024 File: fileBytes, 1025 }) 1026 1027 if err != nil { 1028 return grpcutil.ScrubGRPC(err) 1029 } 1030 return nil 1031 }), 1032 } 1033 createSecret.Flags().StringVarP(&file, "file", "f", "", "File containing Kubernetes secret.") 1034 commands = append(commands, cmdutil.CreateAlias(createSecret, "create secret")) 1035 1036 deleteSecret := &cobra.Command{ 1037 Short: "Delete a secret from the cluster.", 1038 Long: "Delete a secret from the cluster.", 1039 Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) { 1040 client, err := pachdclient.NewOnUserMachine("user") 1041 if err != nil { 1042 return err 1043 } 1044 defer client.Close() 1045 1046 _, err = client.PpsAPIClient.DeleteSecret( 1047 client.Ctx(), 1048 &ppsclient.DeleteSecretRequest{ 1049 Secret: &ppsclient.Secret{ 1050 Name: args[0], 1051 }, 1052 }) 1053 1054 if err != nil { 1055 return grpcutil.ScrubGRPC(err) 1056 } 1057 return nil 1058 }), 1059 } 1060 commands = append(commands, cmdutil.CreateAlias(deleteSecret, "delete secret")) 1061 1062 inspectSecret := &cobra.Command{ 1063 Short: "Inspect a secret from the cluster.", 1064 Long: "Inspect a secret from the cluster.", 1065 Run: cmdutil.RunFixedArgs(1, func(args []string) (retErr error) { 1066 client, err := pachdclient.NewOnUserMachine("user") 1067 if err != nil { 1068 return err 1069 } 1070 defer client.Close() 1071 1072 secretInfo, err := client.PpsAPIClient.InspectSecret( 1073 client.Ctx(), 1074 &ppsclient.InspectSecretRequest{ 1075 Secret: &ppsclient.Secret{ 1076 Name: args[0], 1077 }, 1078 }) 1079 1080 if err != nil { 1081 return grpcutil.ScrubGRPC(err) 1082 } 1083 writer := tabwriter.NewWriter(os.Stdout, pretty.SecretHeader) 1084 pretty.PrintSecretInfo(writer, secretInfo) 1085 return writer.Flush() 1086 }), 1087 } 1088 commands = append(commands, cmdutil.CreateAlias(inspectSecret, "inspect secret")) 1089 1090 listSecret := &cobra.Command{ 1091 Short: "List all secrets from a namespace in the cluster.", 1092 Long: "List all secrets from a namespace in the cluster.", 1093 Run: cmdutil.RunFixedArgs(0, func(args []string) (retErr error) { 1094 client, err := pachdclient.NewOnUserMachine("user") 1095 if err != nil { 1096 return err 1097 } 1098 defer client.Close() 1099 1100 secretInfos, err := client.PpsAPIClient.ListSecret( 1101 client.Ctx(), 1102 &types.Empty{}, 1103 ) 1104 1105 if err != nil { 1106 return grpcutil.ScrubGRPC(err) 1107 } 1108 writer := tabwriter.NewWriter(os.Stdout, pretty.SecretHeader) 1109 for _, si := range secretInfos.GetSecretInfo() { 1110 pretty.PrintSecretInfo(writer, si) 1111 } 1112 return writer.Flush() 1113 }), 1114 } 1115 commands = append(commands, cmdutil.CreateAlias(listSecret, "list secret")) 1116 1117 var memory string 1118 garbageCollect := &cobra.Command{ 1119 Short: "Garbage collect unused data.", 1120 Long: `Garbage collect unused data. 1121 1122 When a file/commit/repo is deleted, the data is not immediately removed from 1123 the underlying storage system (e.g. S3) for performance and architectural 1124 reasons. This is similar to how when you delete a file on your computer, the 1125 file is not necessarily wiped from disk immediately. 1126 1127 To actually remove the data, you will need to manually invoke garbage 1128 collection with "pachctl garbage-collect". 1129 1130 Currently "pachctl garbage-collect" can only be started when there are no 1131 pipelines running. You also need to ensure that there's no ongoing "put file". 1132 Garbage collection puts the cluster into a readonly mode where no new jobs can 1133 be created and no data can be added. 1134 1135 Pachyderm's garbage collection uses bloom filters to index live objects. This 1136 means that some dead objects may erronously not be deleted during garbage 1137 collection. The probability of this happening depends on how many objects you 1138 have; at around 10M objects it starts to become likely with the default values. 1139 To lower Pachyderm's error rate and make garbage-collection more comprehensive, 1140 you can increase the amount of memory used for the bloom filters with the 1141 --memory flag. The default value is 10MB. 1142 `, 1143 Run: cmdutil.RunFixedArgs(0, func(args []string) (retErr error) { 1144 client, err := pachdclient.NewOnUserMachine("user") 1145 if err != nil { 1146 return err 1147 } 1148 defer client.Close() 1149 memoryBytes, err := units.RAMInBytes(memory) 1150 if err != nil { 1151 return err 1152 } 1153 return client.GarbageCollect(memoryBytes) 1154 }), 1155 } 1156 garbageCollect.Flags().StringVarP(&memory, "memory", "m", "0", "The amount of memory to use during garbage collection. Default is 10MB.") 1157 commands = append(commands, cmdutil.CreateAlias(garbageCollect, "garbage-collect")) 1158 1159 return commands 1160 } 1161 1162 func pipelineHelper(reprocess bool, build bool, pushImages bool, registry, username, pipelinePath string, update bool) error { 1163 if build && pushImages { 1164 logrus.Warning("`--push-images` is redundant, as it's already enabled with `--build`") 1165 } 1166 1167 pipelineReader, err := ppsutil.NewPipelineManifestReader(pipelinePath) 1168 if err != nil { 1169 return err 1170 } 1171 1172 pc, err := pachdclient.NewOnUserMachine("user") 1173 if err != nil { 1174 return errors.Wrapf(err, "error connecting to pachd") 1175 } 1176 defer pc.Close() 1177 1178 for { 1179 request, err := pipelineReader.NextCreatePipelineRequest() 1180 if errors.Is(err, io.EOF) { 1181 break 1182 } else if err != nil { 1183 return err 1184 } 1185 1186 if request.Pipeline == nil { 1187 return errors.New("no `pipeline` specified") 1188 } 1189 if request.Pipeline.Name == "" { 1190 return errors.New("no pipeline `name` specified") 1191 } 1192 1193 // Add trace if env var is set 1194 ctx, err := extended.EmbedAnyDuration(pc.Ctx()) 1195 pc = pc.WithCtx(ctx) 1196 if err != nil { 1197 logrus.Warning(err) 1198 } 1199 1200 if update { 1201 request.Update = true 1202 request.Reprocess = reprocess 1203 } 1204 1205 isLocal := true 1206 url, err := url.Parse(pipelinePath) 1207 if pipelinePath != "-" && err == nil && url.Scheme != "" { 1208 isLocal = false 1209 } 1210 1211 if request.Transform != nil && request.Transform.Build != nil { 1212 if !isLocal { 1213 return errors.Errorf("cannot use build step-enabled pipelines that aren't local") 1214 } 1215 if request.Spout != nil { 1216 return errors.New("build step-enabled pipelines do not work with spouts") 1217 } 1218 if request.Input == nil { 1219 return errors.New("no `input` specified") 1220 } 1221 if request.Transform.Build.Language == "" && request.Transform.Build.Image == "" { 1222 return errors.New("must specify either a build `language` or `image`") 1223 } 1224 if request.Transform.Build.Language != "" && request.Transform.Build.Image != "" { 1225 return errors.New("cannot specify both a build `language` and `image`") 1226 } 1227 var err error 1228 ppsclient.VisitInput(request.Input, func(input *ppsclient.Input) { 1229 inputName := ppsclient.InputName(input) 1230 if inputName == "build" || inputName == "source" { 1231 err = errors.New("build step-enabled pipelines cannot have inputs with the name 'build' or 'source', as they are reserved for build assets") 1232 } 1233 }) 1234 if err != nil { 1235 return err 1236 } 1237 pipelineParentPath, _ := filepath.Split(pipelinePath) 1238 if err := buildHelper(pc, request, pipelineParentPath, update); err != nil { 1239 return err 1240 } 1241 } else if build || pushImages { 1242 if build && !isLocal { 1243 return errors.Errorf("cannot build pipeline because it is not local") 1244 } 1245 if request.Transform == nil { 1246 return errors.New("must specify a pipeline `transform`") 1247 } 1248 pipelineParentPath, _ := filepath.Split(pipelinePath) 1249 if err := dockerBuildHelper(request, build, registry, username, pipelineParentPath); err != nil { 1250 return err 1251 } 1252 } 1253 1254 // Don't warn if transform.build is set because latest is almost always 1255 // legit for build-enabled pipelines. 1256 if request.Transform != nil && request.Transform.Build == nil && request.Transform.Image != "" { 1257 if !strings.Contains(request.Transform.Image, ":") { 1258 fmt.Fprintf(os.Stderr, 1259 "WARNING: please specify a tag for the docker image in your transform.image spec.\n"+ 1260 "For example, change 'python' to 'python:3' or 'bash' to 'bash:5'. This improves\n"+ 1261 "reproducibility of your pipelines.\n\n") 1262 } else if strings.HasSuffix(request.Transform.Image, ":latest") { 1263 fmt.Fprintf(os.Stderr, 1264 "WARNING: please do not specify the ':latest' tag for the docker image in your\n"+ 1265 "transform.image spec. For example, change 'python:latest' to 'python:3' or\n"+ 1266 "'bash:latest' to 'bash:5'. This improves reproducibility of your pipelines.\n\n") 1267 } 1268 } 1269 if err = txncmds.WithActiveTransaction(pc, func(txClient *pachdclient.APIClient) error { 1270 _, err := txClient.PpsAPIClient.CreatePipeline( 1271 txClient.Ctx(), 1272 request, 1273 ) 1274 return grpcutil.ScrubGRPC(err) 1275 }); err != nil { 1276 return err 1277 } 1278 } 1279 1280 return nil 1281 } 1282 1283 func dockerBuildHelper(request *ppsclient.CreatePipelineRequest, build bool, registry, username, pipelineParentPath string) error { 1284 // create docker client 1285 dockerClient, err := docker.NewClientFromEnv() 1286 if err != nil { 1287 return errors.Wrapf(err, "could not create a docker client from the environment") 1288 } 1289 1290 var authConfig docker.AuthConfiguration 1291 detectedAuthConfig := false 1292 1293 // try to automatically determine the credentials 1294 authConfigs, err := docker.NewAuthConfigurationsFromDockerCfg() 1295 if err == nil { 1296 for _, ac := range authConfigs.Configs { 1297 u, err := url.Parse(ac.ServerAddress) 1298 if err == nil && u.Hostname() == registry && (username == "" || username == ac.Username) { 1299 authConfig = ac 1300 detectedAuthConfig = true 1301 break 1302 } 1303 } 1304 } 1305 // if that failed, manually build credentials 1306 if !detectedAuthConfig { 1307 if username == "" { 1308 // request the username if it hasn't been specified yet 1309 fmt.Printf("Username for %s: ", registry) 1310 reader := bufio.NewReader(os.Stdin) 1311 username, err = reader.ReadString('\n') 1312 if err != nil { 1313 return errors.Wrapf(err, "could not read username") 1314 } 1315 username = strings.TrimRight(username, "\r\n") 1316 } 1317 1318 // request the password 1319 password, err := cmdutil.ReadPassword(fmt.Sprintf("Password for %s@%s: ", username, registry)) 1320 if err != nil { 1321 return errors.Wrapf(err, "could not read password") 1322 } 1323 1324 authConfig = docker.AuthConfiguration{ 1325 Username: username, 1326 Password: password, 1327 } 1328 } 1329 1330 repo, sourceTag := docker.ParseRepositoryTag(request.Transform.Image) 1331 if sourceTag == "" { 1332 sourceTag = "latest" 1333 } 1334 destTag := uuid.NewWithoutDashes() 1335 1336 if build { 1337 dockerfile := request.Transform.Dockerfile 1338 if dockerfile == "" { 1339 dockerfile = "./Dockerfile" 1340 } 1341 1342 contextDir, dockerfile := filepath.Split(dockerfile) 1343 if !filepath.IsAbs(contextDir) { 1344 contextDir = filepath.Join(pipelineParentPath, contextDir) 1345 } 1346 1347 destImage := fmt.Sprintf("%s:%s", repo, destTag) 1348 1349 fmt.Printf("Building %q, this may take a while.\n", destImage) 1350 1351 err := dockerClient.BuildImage(docker.BuildImageOptions{ 1352 Name: destImage, 1353 ContextDir: contextDir, 1354 Dockerfile: dockerfile, 1355 OutputStream: os.Stdout, 1356 }) 1357 if err != nil { 1358 return errors.Wrapf(err, "could not build docker image") 1359 } 1360 1361 // Now that we've built into `destTag`, change the 1362 // `sourceTag` to be the same so that the push will work with 1363 // the right image 1364 sourceTag = destTag 1365 } 1366 1367 sourceImage := fmt.Sprintf("%s:%s", repo, sourceTag) 1368 destImage := fmt.Sprintf("%s:%s", repo, destTag) 1369 1370 fmt.Printf("Tagging/pushing %q, this may take a while.\n", destImage) 1371 1372 if err := dockerClient.TagImage(sourceImage, docker.TagImageOptions{ 1373 Repo: repo, 1374 Tag: destTag, 1375 Context: context.Background(), 1376 }); err != nil { 1377 return errors.Wrapf(err, "could not tag docker image") 1378 } 1379 1380 if err := dockerClient.PushImage( 1381 docker.PushImageOptions{ 1382 Name: repo, 1383 Tag: destTag, 1384 }, 1385 authConfig, 1386 ); err != nil { 1387 return errors.Wrapf(err, "could not push docker image") 1388 } 1389 1390 request.Transform.Image = destImage 1391 return nil 1392 } 1393 1394 // TODO: if transactions ever add support for pipeline creation, use them here 1395 // to create everything atomically 1396 func buildHelper(pc *pachdclient.APIClient, request *ppsclient.CreatePipelineRequest, pipelineParentPath string, update bool) error { 1397 buildPath := request.Transform.Build.Path 1398 if buildPath == "" { 1399 buildPath = "." 1400 } 1401 if !filepath.IsAbs(buildPath) { 1402 buildPath = filepath.Join(pipelineParentPath, buildPath) 1403 } 1404 if _, err := os.Stat(buildPath); err != nil { 1405 if errors.Is(err, os.ErrNotExist) { 1406 return fmt.Errorf("build path %q does not exist", buildPath) 1407 } 1408 return errors.Wrapf(err, "could not stat build path %q", buildPath) 1409 } 1410 1411 buildPipelineName := fmt.Sprintf("%s_build", request.Pipeline.Name) 1412 1413 image := request.Transform.Build.Image 1414 if image == "" { 1415 pachctlVersion := version.PrettyPrintVersion(version.Version) 1416 image = fmt.Sprintf("pachyderm/%s-build:%s", request.Transform.Build.Language, pachctlVersion) 1417 } 1418 if request.Transform.Image == "" { 1419 request.Transform.Image = image 1420 } 1421 1422 // utility function for creating an input used as part of a build step 1423 createBuildPipelineInput := func(name string) *ppsclient.Input { 1424 return &ppsclient.Input{ 1425 Pfs: &ppsclient.PFSInput{ 1426 Name: name, 1427 Glob: "/", 1428 Repo: buildPipelineName, 1429 Branch: name, 1430 }, 1431 } 1432 } 1433 1434 // create the source repo 1435 if err := pc.UpdateRepo(buildPipelineName); err != nil { 1436 return errors.Wrapf(err, "failed to create repo for build step-enabled pipeline") 1437 } 1438 1439 if err := txncmds.WithActiveTransaction(pc, func(txClient *pachdclient.APIClient) error { 1440 return txClient.CreatePipeline( 1441 buildPipelineName, 1442 image, 1443 []string{"sh", "./build.sh"}, 1444 []string{}, 1445 &ppsclient.ParallelismSpec{Constant: 1}, 1446 createBuildPipelineInput("source"), 1447 "build", 1448 update, 1449 ) 1450 }); err != nil { 1451 return errors.Wrapf(err, "failed to create build pipeline for build step-enabled pipeline") 1452 } 1453 1454 // retrieve ignores (if any) 1455 ignores := []*glob.Glob{} 1456 ignorePath := filepath.Join(buildPath, ".pachignore") 1457 if _, err := os.Stat(ignorePath); err == nil { 1458 f, err := os.Open(ignorePath) 1459 if err != nil { 1460 return errors.Wrapf(err, "failed to read build step ignore file %q", ignorePath) 1461 } 1462 scanner := bufio.NewScanner(f) 1463 for scanner.Scan() { 1464 line := scanner.Text() 1465 g, err := glob.Compile(line) 1466 if err != nil { 1467 return errors.Wrapf(err, "build step ignore file %q: failed to compile glob %q", ignorePath, line) 1468 } 1469 ignores = append(ignores, g) 1470 } 1471 } 1472 1473 // insert the source code 1474 pfc, err := pc.NewPutFileClient() 1475 if err != nil { 1476 return errors.Wrapf(err, "failed to construct put file client for source code in build step-enabled pipeline") 1477 } 1478 if update { 1479 if err = pfc.DeleteFile(buildPipelineName, "source", "/"); err != nil { 1480 return errors.Wrapf(err, "failed to delete existing source code for build step-enabled pipeline") 1481 } 1482 } 1483 if err := filepath.Walk(buildPath, func(srcFilePath string, info os.FileInfo, _ error) (retErr error) { 1484 if info == nil { 1485 return errors.Errorf("%q doesn't exist", srcFilePath) 1486 } 1487 if info.IsDir() { 1488 return nil 1489 } 1490 1491 destFilePath, err := filepath.Rel(buildPath, srcFilePath) 1492 if err != nil { 1493 return errors.Wrapf(err, "failed to discover relative path for %s", srcFilePath) 1494 } 1495 for _, g := range ignores { 1496 if g.Match(destFilePath) { 1497 return nil 1498 } 1499 } 1500 1501 f, err := progress.Open(srcFilePath) 1502 if err != nil { 1503 return errors.Wrapf(err, "failed to open file %q for source code in build step-enabled pipeline", srcFilePath) 1504 } 1505 defer func() { 1506 if err := f.Close(); err != nil && retErr == nil { 1507 retErr = err 1508 } 1509 }() 1510 1511 if _, err = pfc.PutFileOverwrite(buildPipelineName, "source", destFilePath, f, 0); err != nil { 1512 return errors.Wrapf(err, "failed to put file %q->%q for source code in build step-enabled pipeline", srcFilePath, destFilePath) 1513 } 1514 1515 return nil 1516 }); err != nil { 1517 return err 1518 } 1519 if err := pfc.Close(); err != nil { 1520 return errors.Wrapf(err, "failed to close put file client for source code in build step-enabled pipeline") 1521 } 1522 1523 // modify the pipeline to use the build assets 1524 request.Input = &ppsclient.Input{ 1525 Cross: []*ppsclient.Input{ 1526 createBuildPipelineInput("source"), 1527 createBuildPipelineInput("build"), 1528 request.Input, 1529 }, 1530 } 1531 if request.Transform.Cmd == nil || len(request.Transform.Cmd) == 0 { 1532 request.Transform.Cmd = []string{"sh", "/pfs/build/run.sh"} 1533 } 1534 1535 return nil 1536 } 1537 1538 // ByCreationTime is an implementation of sort.Interface which 1539 // sorts pps job info by creation time, ascending. 1540 type ByCreationTime []*ppsclient.JobInfo 1541 1542 func (arr ByCreationTime) Len() int { return len(arr) } 1543 1544 func (arr ByCreationTime) Swap(i, j int) { arr[i], arr[j] = arr[j], arr[i] } 1545 1546 func (arr ByCreationTime) Less(i, j int) bool { 1547 if arr[i].Started == nil || arr[j].Started == nil { 1548 return false 1549 } 1550 1551 if arr[i].Started.Seconds < arr[j].Started.Seconds { 1552 return true 1553 } else if arr[i].Started.Seconds == arr[j].Started.Seconds { 1554 return arr[i].Started.Nanos < arr[j].Started.Nanos 1555 } 1556 1557 return false 1558 } 1559 1560 func validateJQConditionString(filter string) (string, error) { 1561 q, err := gojq.Parse(filter) 1562 if err != nil { 1563 return "", err 1564 } 1565 _, err = gojq.Compile(q) 1566 if err != nil { 1567 return "", err 1568 } 1569 return filter, nil 1570 } 1571 1572 // ParseJobStates parses a slice of state names into a jq filter suitable for ListJob 1573 func ParseJobStates(stateStrs []string) (string, error) { 1574 var conditions []string 1575 for _, stateStr := range stateStrs { 1576 if state, err := ppsclient.JobStateFromName(stateStr); err == nil { 1577 conditions = append(conditions, fmt.Sprintf(".state == \"%s\"", state)) 1578 } else { 1579 return "", err 1580 } 1581 } 1582 return validateJQConditionString(strings.Join(conditions, " or ")) 1583 } 1584 1585 // ParsePipelineStates parses a slice of state names into a jq filter suitable for ListPipeline 1586 func ParsePipelineStates(stateStrs []string) (string, error) { 1587 var conditions []string 1588 for _, stateStr := range stateStrs { 1589 if state, err := ppsclient.PipelineStateFromName(stateStr); err == nil { 1590 conditions = append(conditions, fmt.Sprintf(".state == \"%s\"", state)) 1591 } else { 1592 return "", err 1593 } 1594 } 1595 return validateJQConditionString(strings.Join(conditions, " or ")) 1596 }