github.com/pachyderm/pachyderm@v1.13.4/src/client/pfs.go (about) 1 package client 2 3 import ( 4 "bytes" 5 "context" 6 "io" 7 "sync" 8 9 "github.com/gogo/protobuf/types" 10 "github.com/pachyderm/pachyderm/src/client/pfs" 11 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 12 "github.com/pachyderm/pachyderm/src/client/pkg/grpcutil" 13 "github.com/pachyderm/pachyderm/src/server/pkg/errutil" 14 ) 15 16 // NewRepo creates a pfs.Repo. 17 func NewRepo(repoName string) *pfs.Repo { 18 return &pfs.Repo{Name: repoName} 19 } 20 21 // NewBranch creates a pfs.Branch 22 func NewBranch(repoName string, branchName string) *pfs.Branch { 23 return &pfs.Branch{ 24 Repo: NewRepo(repoName), 25 Name: branchName, 26 } 27 } 28 29 // NewCommit creates a pfs.Commit. 30 func NewCommit(repoName string, commitID string) *pfs.Commit { 31 return &pfs.Commit{ 32 Repo: NewRepo(repoName), 33 ID: commitID, 34 } 35 } 36 37 // NewCommitProvenance creates a pfs.CommitProvenance. 38 func NewCommitProvenance(repoName string, branchName string, commitID string) *pfs.CommitProvenance { 39 return &pfs.CommitProvenance{ 40 Commit: NewCommit(repoName, commitID), 41 Branch: NewBranch(repoName, branchName), 42 } 43 } 44 45 // NewFile creates a pfs.File. 46 func NewFile(repoName string, commitID string, path string) *pfs.File { 47 return &pfs.File{ 48 Commit: NewCommit(repoName, commitID), 49 Path: path, 50 } 51 } 52 53 // NewObject creates a pfs.Object. 54 func NewObject(hash string) *pfs.Object { 55 return &pfs.Object{ 56 Hash: hash, 57 } 58 } 59 60 // NewBlock creates a pfs.Block. 61 func NewBlock(hash string) *pfs.Block { 62 return &pfs.Block{ 63 Hash: hash, 64 } 65 } 66 67 // NewBlockRef creates a pfs.BlockRef. 68 func NewBlockRef(hash string, lower, upper uint64) *pfs.BlockRef { 69 return &pfs.BlockRef{ 70 Block: NewBlock(hash), 71 Range: &pfs.ByteRange{Lower: lower, Upper: upper}, 72 } 73 } 74 75 // NewTag creates a pfs.Tag. 76 func NewTag(name string) *pfs.Tag { 77 return &pfs.Tag{ 78 Name: name, 79 } 80 } 81 82 // CreateRepo creates a new Repo object in pfs with the given name. Repos are 83 // the top level data object in pfs and should be used to store data of a 84 // similar type. For example rather than having a single Repo for an entire 85 // project you might have separate Repos for logs, metrics, database dumps etc. 86 func (c APIClient) CreateRepo(repoName string) error { 87 _, err := c.PfsAPIClient.CreateRepo( 88 c.Ctx(), 89 &pfs.CreateRepoRequest{ 90 Repo: NewRepo(repoName), 91 }, 92 ) 93 return grpcutil.ScrubGRPC(err) 94 } 95 96 // UpdateRepo upserts a repo with the given name. 97 func (c APIClient) UpdateRepo(repoName string) error { 98 _, err := c.PfsAPIClient.CreateRepo( 99 c.Ctx(), 100 &pfs.CreateRepoRequest{ 101 Repo: NewRepo(repoName), 102 Update: true, 103 }, 104 ) 105 return grpcutil.ScrubGRPC(err) 106 } 107 108 // InspectRepo returns info about a specific Repo. 109 func (c APIClient) InspectRepo(repoName string) (*pfs.RepoInfo, error) { 110 resp, err := c.PfsAPIClient.InspectRepo( 111 c.Ctx(), 112 &pfs.InspectRepoRequest{ 113 Repo: NewRepo(repoName), 114 }, 115 ) 116 if err != nil { 117 return nil, grpcutil.ScrubGRPC(err) 118 } 119 return resp, nil 120 } 121 122 // ListRepo returns info about all Repos. 123 // provenance specifies a set of provenance repos, only repos which have ALL of 124 // the specified repos as provenance will be returned unless provenance is nil 125 // in which case it is ignored. 126 func (c APIClient) ListRepo() ([]*pfs.RepoInfo, error) { 127 request := &pfs.ListRepoRequest{} 128 repoInfos, err := c.PfsAPIClient.ListRepo( 129 c.Ctx(), 130 request, 131 ) 132 if err != nil { 133 return nil, grpcutil.ScrubGRPC(err) 134 } 135 return repoInfos.RepoInfo, nil 136 } 137 138 // DeleteRepo deletes a repo and reclaims the storage space it was using. Note 139 // that as of 1.0 we do not reclaim the blocks that the Repo was referencing, 140 // this is because they may also be referenced by other Repos and deleting them 141 // would make those Repos inaccessible. This will be resolved in later 142 // versions. 143 // If "force" is set to true, the repo will be removed regardless of errors. 144 // This argument should be used with care. 145 func (c APIClient) DeleteRepo(repoName string, force bool, splitTransaction ...bool) error { 146 request := &pfs.DeleteRepoRequest{ 147 Repo: NewRepo(repoName), 148 Force: force, 149 } 150 if len(splitTransaction) > 0 { 151 request.SplitTransaction = splitTransaction[0] 152 } 153 _, err := c.PfsAPIClient.DeleteRepo( 154 c.Ctx(), 155 request, 156 ) 157 return grpcutil.ScrubGRPC(err) 158 } 159 160 // StartCommit begins the process of committing data to a Repo. Once started 161 // you can write to the Commit with PutFile and when all the data has been 162 // written you must finish the Commit with FinishCommit. NOTE, data is not 163 // persisted until FinishCommit is called. 164 // branch is a more convenient way to build linear chains of commits. When a 165 // commit is started with a non empty branch the value of branch becomes an 166 // alias for the created Commit. This enables a more intuitive access pattern. 167 // When the commit is started on a branch the previous head of the branch is 168 // used as the parent of the commit. 169 func (c APIClient) StartCommit(repoName string, branch string) (*pfs.Commit, error) { 170 commit, err := c.PfsAPIClient.StartCommit( 171 c.Ctx(), 172 &pfs.StartCommitRequest{ 173 Parent: &pfs.Commit{ 174 Repo: &pfs.Repo{ 175 Name: repoName, 176 }, 177 }, 178 Branch: branch, 179 }, 180 ) 181 if err != nil { 182 return nil, grpcutil.ScrubGRPC(err) 183 } 184 return commit, nil 185 } 186 187 // BuildCommit builds a commit in a single call from an existing HashTree that 188 // has already been written to the object store. Note this is a more advanced 189 // pattern for creating commits that's mostly used internally. 190 func (c APIClient) BuildCommit(repoName string, branch string, parent string, treeObject string, sizeBytes uint64) (*pfs.Commit, error) { 191 commit, err := c.PfsAPIClient.BuildCommit( 192 c.Ctx(), 193 &pfs.BuildCommitRequest{ 194 Parent: NewCommit(repoName, parent), 195 Branch: branch, 196 Tree: &pfs.Object{Hash: treeObject}, 197 SizeBytes: sizeBytes, 198 }, 199 ) 200 if err != nil { 201 return nil, grpcutil.ScrubGRPC(err) 202 } 203 return commit, nil 204 } 205 206 // StartCommitParent begins the process of committing data to a Repo. Once started 207 // you can write to the Commit with PutFile and when all the data has been 208 // written you must finish the Commit with FinishCommit. NOTE, data is not 209 // persisted until FinishCommit is called. 210 // branch is a more convenient way to build linear chains of commits. When a 211 // commit is started with a non empty branch the value of branch becomes an 212 // alias for the created Commit. This enables a more intuitive access pattern. 213 // When the commit is started on a branch the previous head of the branch is 214 // used as the parent of the commit. 215 // parentCommit specifies the parent Commit, upon creation the new Commit will 216 // appear identical to the parent Commit, data can safely be added to the new 217 // commit without affecting the contents of the parent Commit. You may pass "" 218 // as parentCommit in which case the new Commit will have no parent and will 219 // initially appear empty. 220 func (c APIClient) StartCommitParent(repoName string, branch string, parentCommit string) (*pfs.Commit, error) { 221 commit, err := c.PfsAPIClient.StartCommit( 222 c.Ctx(), 223 &pfs.StartCommitRequest{ 224 Parent: &pfs.Commit{ 225 Repo: &pfs.Repo{ 226 Name: repoName, 227 }, 228 ID: parentCommit, 229 }, 230 Branch: branch, 231 }, 232 ) 233 if err != nil { 234 return nil, grpcutil.ScrubGRPC(err) 235 } 236 return commit, nil 237 } 238 239 // FinishCommit ends the process of committing data to a Repo and persists the 240 // Commit. Once a Commit is finished the data becomes immutable and future 241 // attempts to write to it with PutFile will error. 242 func (c APIClient) FinishCommit(repoName string, commitID string) error { 243 _, err := c.PfsAPIClient.FinishCommit( 244 c.Ctx(), 245 &pfs.FinishCommitRequest{ 246 Commit: NewCommit(repoName, commitID), 247 }, 248 ) 249 return grpcutil.ScrubGRPC(err) 250 } 251 252 // InspectCommit returns info about a specific Commit. 253 func (c APIClient) InspectCommit(repoName string, commitID string) (*pfs.CommitInfo, error) { 254 return c.inspectCommit(repoName, commitID, pfs.CommitState_STARTED) 255 } 256 257 // BlockCommit returns info about a specific Commit, but blocks until that 258 // commit has been finished. 259 func (c APIClient) BlockCommit(repoName string, commitID string) (*pfs.CommitInfo, error) { 260 return c.inspectCommit(repoName, commitID, pfs.CommitState_FINISHED) 261 } 262 263 func (c APIClient) inspectCommit(repoName string, commitID string, blockState pfs.CommitState) (*pfs.CommitInfo, error) { 264 commitInfo, err := c.PfsAPIClient.InspectCommit( 265 c.Ctx(), 266 &pfs.InspectCommitRequest{ 267 Commit: NewCommit(repoName, commitID), 268 BlockState: blockState, 269 }, 270 ) 271 if err != nil { 272 return nil, grpcutil.ScrubGRPC(err) 273 } 274 return commitInfo, nil 275 } 276 277 // ListCommit lists commits. 278 // If only `repo` is given, all commits in the repo are returned. 279 // If `to` is given, only the ancestors of `to`, including `to` itself, 280 // are considered. 281 // If `from` is given, only the descendents of `from`, including `from` 282 // itself, are considered. 283 // If `to` and `from` are the same commit, no commits will be returned. 284 // `number` determines how many commits are returned. If `number` is 0, 285 // all commits that match the aforementioned criteria are returned. 286 func (c APIClient) ListCommit(repoName string, to string, from string, number uint64) ([]*pfs.CommitInfo, error) { 287 var result []*pfs.CommitInfo 288 if err := c.ListCommitF(repoName, to, from, number, false, func(ci *pfs.CommitInfo) error { 289 result = append(result, ci) 290 return nil 291 }); err != nil { 292 return nil, err 293 } 294 return result, nil 295 } 296 297 // ListCommitF lists commits, calling f with each commit. 298 // If only `repo` is given, all commits in the repo are returned. 299 // If `to` is given, only the ancestors of `to`, including `to` itself, 300 // are considered. 301 // If `from` is given, only the descendents of `from`, including `from` 302 // itself, are considered. 303 // If `to` and `from` are the same commit, no commits will be returned. 304 // `number` determines how many commits are returned. If `number` is 0, 305 // `reverse` lists the commits from oldest to newest, rather than newest to oldest 306 // all commits that match the aforementioned criteria are passed to f. 307 func (c APIClient) ListCommitF(repoName string, to string, from string, number uint64, reverse bool, f func(*pfs.CommitInfo) error) error { 308 req := &pfs.ListCommitRequest{ 309 // repoName may be "", but the repo object must exist 310 Repo: NewRepo(repoName), 311 Number: number, 312 Reverse: reverse, 313 } 314 if from != "" { 315 req.From = NewCommit(repoName, from) 316 } 317 if to != "" { 318 req.To = NewCommit(repoName, to) 319 } 320 stream, err := c.PfsAPIClient.ListCommitStream(c.Ctx(), req) 321 if err != nil { 322 return grpcutil.ScrubGRPC(err) 323 } 324 for { 325 ci, err := stream.Recv() 326 if errors.Is(err, io.EOF) { 327 break 328 } else if err != nil { 329 return grpcutil.ScrubGRPC(err) 330 } 331 if err := f(ci); err != nil { 332 if errors.Is(err, errutil.ErrBreak) { 333 return nil 334 } 335 return err 336 } 337 } 338 return nil 339 } 340 341 // ListCommitByRepo lists all commits in a repo. 342 func (c APIClient) ListCommitByRepo(repoName string) ([]*pfs.CommitInfo, error) { 343 return c.ListCommit(repoName, "", "", 0) 344 } 345 346 // CreateBranch creates a new branch 347 func (c APIClient) CreateBranch(repoName string, branch string, commit string, provenance []*pfs.Branch) error { 348 var head *pfs.Commit 349 if commit != "" { 350 head = NewCommit(repoName, commit) 351 } 352 _, err := c.PfsAPIClient.CreateBranch( 353 c.Ctx(), 354 &pfs.CreateBranchRequest{ 355 Branch: NewBranch(repoName, branch), 356 Head: head, 357 Provenance: provenance, 358 }, 359 ) 360 return grpcutil.ScrubGRPC(err) 361 } 362 363 // CreateBranchTrigger Creates a branch with a trigger. Note: triggers and 364 // provenance are mutually exclusive. See the docs on triggers to learn more 365 // about why this is. 366 func (c APIClient) CreateBranchTrigger(repoName string, branch string, commit string, trigger *pfs.Trigger) error { 367 var head *pfs.Commit 368 if commit != "" { 369 head = NewCommit(repoName, commit) 370 } 371 _, err := c.PfsAPIClient.CreateBranch( 372 c.Ctx(), 373 &pfs.CreateBranchRequest{ 374 Branch: NewBranch(repoName, branch), 375 Head: head, 376 Trigger: trigger, 377 }, 378 ) 379 return grpcutil.ScrubGRPC(err) 380 } 381 382 // InspectBranch returns information on a specific PFS branch 383 func (c APIClient) InspectBranch(repoName string, branch string) (*pfs.BranchInfo, error) { 384 branchInfo, err := c.PfsAPIClient.InspectBranch( 385 c.Ctx(), 386 &pfs.InspectBranchRequest{ 387 Branch: NewBranch(repoName, branch), 388 }, 389 ) 390 return branchInfo, grpcutil.ScrubGRPC(err) 391 } 392 393 // ListBranch lists the active branches on a Repo. 394 func (c APIClient) ListBranch(repoName string) ([]*pfs.BranchInfo, error) { 395 branchInfos, err := c.PfsAPIClient.ListBranch( 396 c.Ctx(), 397 &pfs.ListBranchRequest{ 398 Repo: NewRepo(repoName), 399 }, 400 ) 401 if err != nil { 402 return nil, grpcutil.ScrubGRPC(err) 403 } 404 return branchInfos.BranchInfo, nil 405 } 406 407 // SetBranch sets a commit and its ancestors as a branch. 408 // SetBranch is deprecated in favor of CreateBranch. 409 func (c APIClient) SetBranch(repoName string, commit string, branch string) error { 410 return c.CreateBranch(repoName, branch, commit, nil) 411 } 412 413 // DeleteBranch deletes a branch, but leaves the commits themselves intact. 414 // In other words, those commits can still be accessed via commit IDs and 415 // other branches they happen to be on. 416 func (c APIClient) DeleteBranch(repoName string, branch string, force bool) error { 417 _, err := c.PfsAPIClient.DeleteBranch( 418 c.Ctx(), 419 &pfs.DeleteBranchRequest{ 420 Branch: NewBranch(repoName, branch), 421 Force: force, 422 }, 423 ) 424 return grpcutil.ScrubGRPC(err) 425 } 426 427 // DeleteCommit deletes a commit. 428 func (c APIClient) DeleteCommit(repoName string, commitID string) error { 429 _, err := c.PfsAPIClient.DeleteCommit( 430 c.Ctx(), 431 &pfs.DeleteCommitRequest{ 432 Commit: NewCommit(repoName, commitID), 433 }, 434 ) 435 return grpcutil.ScrubGRPC(err) 436 } 437 438 // FlushCommit returns an iterator that returns commits that have the 439 // specified `commits` as provenance. Note that the iterator can block if 440 // jobs have not successfully completed. This in effect waits for all of the 441 // jobs that are triggered by a set of commits to complete. 442 // 443 // If toRepos is not nil then only the commits up to and including those 444 // repos will be considered, otherwise all repos are considered. 445 // 446 // Note that it's never necessary to call FlushCommit to run jobs, they'll 447 // run no matter what, FlushCommit just allows you to wait for them to 448 // complete and see their output once they do. 449 func (c APIClient) FlushCommit(commits []*pfs.Commit, toRepos []*pfs.Repo) (CommitInfoIterator, error) { 450 ctx, cancel := context.WithCancel(c.Ctx()) 451 stream, err := c.PfsAPIClient.FlushCommit( 452 ctx, 453 &pfs.FlushCommitRequest{ 454 Commits: commits, 455 ToRepos: toRepos, 456 }, 457 ) 458 if err != nil { 459 cancel() 460 return nil, grpcutil.ScrubGRPC(err) 461 } 462 return &commitInfoIterator{stream, cancel}, nil 463 } 464 465 // FlushCommitF calls f with commits that have the specified `commits` as 466 // provenance. Note that it can block if jobs have not successfully 467 // completed. This in effect waits for all of the jobs that are triggered by a 468 // set of commits to complete. 469 // 470 // If toRepos is not nil then only the commits up to and including those repos 471 // will be considered, otherwise all repos are considered. 472 // 473 // Note that it's never necessary to call FlushCommit to run jobs, they'll run 474 // no matter what, FlushCommitF just allows you to wait for them to complete and 475 // see their output once they do. 476 func (c APIClient) FlushCommitF(commits []*pfs.Commit, toRepos []*pfs.Repo, f func(*pfs.CommitInfo) error) error { 477 stream, err := c.PfsAPIClient.FlushCommit( 478 c.Ctx(), 479 &pfs.FlushCommitRequest{ 480 Commits: commits, 481 ToRepos: toRepos, 482 }, 483 ) 484 if err != nil { 485 return grpcutil.ScrubGRPC(err) 486 } 487 for { 488 ci, err := stream.Recv() 489 if err != nil { 490 if errors.Is(err, io.EOF) { 491 return nil 492 } 493 return grpcutil.ScrubGRPC(err) 494 } 495 if err := f(ci); err != nil { 496 return err 497 } 498 } 499 } 500 501 // FlushCommitAll returns commits that have the specified `commits` as 502 // provenance. Note that it can block if jobs have not successfully 503 // completed. This in effect waits for all of the jobs that are triggered by a 504 // set of commits to complete. 505 // 506 // If toRepos is not nil then only the commits up to and including those repos 507 // will be considered, otherwise all repos are considered. 508 // 509 // Note that it's never necessary to call FlushCommit to run jobs, they'll run 510 // no matter what, FlushCommitAll just allows you to wait for them to complete and 511 // see their output once they do. 512 func (c APIClient) FlushCommitAll(commits []*pfs.Commit, toRepos []*pfs.Repo) ([]*pfs.CommitInfo, error) { 513 var result []*pfs.CommitInfo 514 if err := c.FlushCommitF(commits, toRepos, func(ci *pfs.CommitInfo) error { 515 result = append(result, ci) 516 return nil 517 }); err != nil { 518 return nil, err 519 } 520 return result, nil 521 } 522 523 // CommitInfoIterator wraps a stream of commits and makes them easy to iterate. 524 type CommitInfoIterator interface { 525 Next() (*pfs.CommitInfo, error) 526 Close() 527 } 528 529 type commitInfoIterator struct { 530 stream pfs.API_SubscribeCommitClient 531 cancel context.CancelFunc 532 } 533 534 func (c *commitInfoIterator) Next() (*pfs.CommitInfo, error) { 535 return c.stream.Recv() 536 } 537 538 func (c *commitInfoIterator) Close() { 539 c.cancel() 540 // this is completely retarded, but according to this thread it's 541 // necessary for closing a server-side stream from the client side. 542 // https://github.com/grpc/grpc-go/issues/188 543 for { 544 if _, err := c.stream.Recv(); err != nil { 545 break 546 } 547 } 548 } 549 550 // SubscribeCommit is like ListCommit but it keeps listening for commits as 551 // they come in. 552 func (c APIClient) SubscribeCommit(repo, branch string, prov *pfs.CommitProvenance, from string, state pfs.CommitState) (CommitInfoIterator, error) { 553 ctx, cancel := context.WithCancel(c.Ctx()) 554 req := &pfs.SubscribeCommitRequest{ 555 Repo: NewRepo(repo), 556 Branch: branch, 557 Prov: prov, 558 State: state, 559 } 560 if from != "" { 561 req.From = NewCommit(repo, from) 562 } 563 stream, err := c.PfsAPIClient.SubscribeCommit(ctx, req) 564 if err != nil { 565 cancel() 566 return nil, grpcutil.ScrubGRPC(err) 567 } 568 return &commitInfoIterator{stream, cancel}, nil 569 } 570 571 // SubscribeCommitF is like ListCommit but it calls a callback function with 572 // the results rather than returning an iterator. 573 func (c APIClient) SubscribeCommitF(repo, branch string, prov *pfs.CommitProvenance, from string, state pfs.CommitState, f func(*pfs.CommitInfo) error) error { 574 req := &pfs.SubscribeCommitRequest{ 575 Repo: NewRepo(repo), 576 Branch: branch, 577 Prov: prov, 578 State: state, 579 } 580 if from != "" { 581 req.From = NewCommit(repo, from) 582 } 583 stream, err := c.PfsAPIClient.SubscribeCommit(c.Ctx(), req) 584 if err != nil { 585 return grpcutil.ScrubGRPC(err) 586 } 587 for { 588 ci, err := stream.Recv() 589 if err != nil { 590 return grpcutil.ScrubGRPC(err) 591 } 592 if err := f(ci); err != nil { 593 return grpcutil.ScrubGRPC(err) 594 } 595 } 596 } 597 598 // PutObjectAsync puts a value into the object store asynchronously. 599 func (c APIClient) PutObjectAsync(tags []*pfs.Tag) (*PutObjectWriteCloserAsync, error) { 600 w, err := c.newPutObjectWriteCloserAsync(tags) 601 if err != nil { 602 return nil, grpcutil.ScrubGRPC(err) 603 } 604 return w, nil 605 } 606 607 // PutObject puts a value into the object store and tags it with 0 or more tags. 608 func (c APIClient) PutObject(_r io.Reader, tags ...string) (object *pfs.Object, _ int64, retErr error) { 609 r := grpcutil.ReaderWrapper{_r} 610 w, err := c.newPutObjectWriteCloser(tags...) 611 if err != nil { 612 return nil, 0, grpcutil.ScrubGRPC(err) 613 } 614 defer func() { 615 if err := w.Close(); err != nil && retErr == nil { 616 retErr = grpcutil.ScrubGRPC(err) 617 } 618 if retErr == nil { 619 object = w.object 620 } 621 }() 622 buf := grpcutil.GetBuffer() 623 defer grpcutil.PutBuffer(buf) 624 written, err := io.CopyBuffer(w, r, buf) 625 if err != nil { 626 return nil, 0, grpcutil.ScrubGRPC(err) 627 } 628 // return value set by deferred function 629 return nil, written, nil 630 } 631 632 // PutObjectSplit is the same as PutObject except that the data is splitted 633 // into several smaller objects. This is primarily useful if you'd like to 634 // be able to resume upload. 635 func (c APIClient) PutObjectSplit(_r io.Reader) (objects []*pfs.Object, _ int64, retErr error) { 636 r := grpcutil.ReaderWrapper{_r} 637 w, err := c.newPutObjectSplitWriteCloser() 638 if err != nil { 639 return nil, 0, grpcutil.ScrubGRPC(err) 640 } 641 defer func() { 642 if err := w.Close(); err != nil && retErr == nil { 643 retErr = grpcutil.ScrubGRPC(err) 644 } 645 if retErr == nil { 646 objects = w.objects 647 } 648 }() 649 buf := grpcutil.GetBuffer() 650 defer grpcutil.PutBuffer(buf) 651 written, err := io.CopyBuffer(w, r, buf) 652 if err != nil { 653 return nil, 0, grpcutil.ScrubGRPC(err) 654 } 655 // return value set by deferred function 656 return nil, written, nil 657 } 658 659 // CreateObject creates an object with hash, referencing the range 660 // [lower,upper] in block. The block should already exist. 661 func (c APIClient) CreateObject(hash, block string, lower, upper uint64) error { 662 _, err := c.ObjectAPIClient.CreateObject(c.Ctx(), &pfs.CreateObjectRequest{ 663 Object: NewObject(hash), 664 BlockRef: NewBlockRef(block, lower, upper), 665 }) 666 return grpcutil.ScrubGRPC(err) 667 } 668 669 // GetObject gets an object out of the object store by hash. 670 func (c APIClient) GetObject(hash string, writer io.Writer) error { 671 getObjectClient, err := c.ObjectAPIClient.GetObject( 672 c.Ctx(), 673 &pfs.Object{Hash: hash}, 674 ) 675 if err != nil { 676 return grpcutil.ScrubGRPC(err) 677 } 678 if err := grpcutil.WriteFromStreamingBytesClient(getObjectClient, writer); err != nil { 679 return grpcutil.ScrubGRPC(err) 680 } 681 return nil 682 } 683 684 // GetObjectReader returns a reader for an object in object store by hash. 685 func (c APIClient) GetObjectReader(hash string) (io.ReadCloser, error) { 686 ctx, cancel := context.WithCancel(c.Ctx()) 687 getObjectClient, err := c.ObjectAPIClient.GetObject( 688 ctx, 689 &pfs.Object{Hash: hash}, 690 ) 691 if err != nil { 692 return nil, grpcutil.ScrubGRPC(err) 693 } 694 return grpcutil.NewStreamingBytesReader(getObjectClient, cancel), nil 695 } 696 697 // ReadObject gets an object by hash and returns it directly as []byte. 698 func (c APIClient) ReadObject(hash string) ([]byte, error) { 699 var buffer bytes.Buffer 700 if err := c.GetObject(hash, &buffer); err != nil { 701 return nil, grpcutil.ScrubGRPC(err) 702 } 703 return buffer.Bytes(), nil 704 } 705 706 // GetObjects gets several objects out of the object store by hash. 707 func (c APIClient) GetObjects(hashes []string, offset uint64, size uint64, totalSize uint64, writer io.Writer) error { 708 var objects []*pfs.Object 709 for _, hash := range hashes { 710 objects = append(objects, &pfs.Object{Hash: hash}) 711 } 712 getObjectsClient, err := c.ObjectAPIClient.GetObjects( 713 c.Ctx(), 714 &pfs.GetObjectsRequest{ 715 Objects: objects, 716 OffsetBytes: offset, 717 SizeBytes: size, 718 TotalSize: totalSize, 719 }, 720 ) 721 if err != nil { 722 return grpcutil.ScrubGRPC(err) 723 } 724 if err := grpcutil.WriteFromStreamingBytesClient(getObjectsClient, writer); err != nil { 725 return grpcutil.ScrubGRPC(err) 726 } 727 return nil 728 } 729 730 // ReadObjects gets several objects by hash and returns them directly as []byte. 731 func (c APIClient) ReadObjects(hashes []string, offset uint64, size uint64) ([]byte, error) { 732 var buffer bytes.Buffer 733 if err := c.GetObjects(hashes, offset, size, 0, &buffer); err != nil { 734 return nil, err 735 } 736 return buffer.Bytes(), nil 737 } 738 739 // TagObject applies a tag to an existing object. 740 func (c APIClient) TagObject(hash string, tags ...string) error { 741 var _tags []*pfs.Tag 742 for _, tag := range tags { 743 _tags = append(_tags, &pfs.Tag{Name: tag}) 744 } 745 if _, err := c.ObjectAPIClient.TagObject( 746 c.Ctx(), 747 &pfs.TagObjectRequest{ 748 Object: &pfs.Object{Hash: hash}, 749 Tags: _tags, 750 }, 751 ); err != nil { 752 return grpcutil.ScrubGRPC(err) 753 } 754 return nil 755 } 756 757 // ListObject lists objects stored in pfs. 758 func (c APIClient) ListObject(f func(*pfs.ObjectInfo) error) error { 759 listObjectClient, err := c.ObjectAPIClient.ListObjects(c.Ctx(), &pfs.ListObjectsRequest{}) 760 if err != nil { 761 return grpcutil.ScrubGRPC(err) 762 } 763 for { 764 oi, err := listObjectClient.Recv() 765 if err != nil { 766 if errors.Is(err, io.EOF) { 767 return nil 768 } 769 return grpcutil.ScrubGRPC(err) 770 } 771 if err := f(oi); err != nil { 772 return err 773 } 774 } 775 } 776 777 // InspectObject returns info about an Object. 778 func (c APIClient) InspectObject(hash string) (*pfs.ObjectInfo, error) { 779 value, err := c.ObjectAPIClient.InspectObject( 780 c.Ctx(), 781 &pfs.Object{Hash: hash}, 782 ) 783 if err != nil { 784 return nil, grpcutil.ScrubGRPC(err) 785 } 786 return value, nil 787 } 788 789 // GetTag gets an object out of the object store by tag. 790 func (c APIClient) GetTag(tag string, writer io.Writer) error { 791 getTagClient, err := c.ObjectAPIClient.GetTag( 792 c.Ctx(), 793 &pfs.Tag{Name: tag}, 794 ) 795 if err != nil { 796 return grpcutil.ScrubGRPC(err) 797 } 798 if err := grpcutil.WriteFromStreamingBytesClient(getTagClient, writer); err != nil { 799 return grpcutil.ScrubGRPC(err) 800 } 801 return nil 802 } 803 804 // GetTagReader returns a reader for an object in object store by tag. 805 func (c APIClient) GetTagReader(tag string) (io.ReadCloser, error) { 806 ctx, cancel := context.WithCancel(c.Ctx()) 807 getTagClient, err := c.ObjectAPIClient.GetTag( 808 ctx, 809 &pfs.Tag{Name: tag}, 810 ) 811 if err != nil { 812 return nil, grpcutil.ScrubGRPC(err) 813 } 814 return grpcutil.NewStreamingBytesReader(getTagClient, cancel), nil 815 } 816 817 // ReadTag gets an object by tag and returns it directly as []byte. 818 func (c APIClient) ReadTag(tag string) ([]byte, error) { 819 var buffer bytes.Buffer 820 if err := c.GetTag(tag, &buffer); err != nil { 821 return nil, err 822 } 823 return buffer.Bytes(), nil 824 } 825 826 // ListTag lists tags stored in pfs. 827 func (c APIClient) ListTag(f func(*pfs.ListTagsResponse) error) error { 828 listTagClient, err := c.ObjectAPIClient.ListTags(c.Ctx(), &pfs.ListTagsRequest{IncludeObject: true}) 829 if err != nil { 830 return grpcutil.ScrubGRPC(err) 831 } 832 for { 833 listTagResponse, err := listTagClient.Recv() 834 if err != nil { 835 if errors.Is(err, io.EOF) { 836 return nil 837 } 838 return grpcutil.ScrubGRPC(err) 839 } 840 if err := f(listTagResponse); err != nil { 841 if errors.Is(err, errutil.ErrBreak) { 842 return nil 843 } 844 return err 845 } 846 } 847 } 848 849 // ListBlock lists blocks stored in pfs. 850 func (c APIClient) ListBlock(f func(*pfs.Block) error) error { 851 listBlocksClient, err := c.ObjectAPIClient.ListBlock(c.Ctx(), &pfs.ListBlockRequest{}) 852 if err != nil { 853 return err 854 } 855 for { 856 block, err := listBlocksClient.Recv() 857 if err != nil { 858 if errors.Is(err, io.EOF) { 859 return nil 860 } 861 return grpcutil.ScrubGRPC(err) 862 } 863 if err := f(block); err != nil { 864 if errors.Is(err, errutil.ErrBreak) { 865 return nil 866 } 867 return err 868 } 869 } 870 } 871 872 // GetBlock gets the content of a block. 873 func (c APIClient) GetBlock(hash string, w io.Writer) error { 874 getBlockClient, err := c.ObjectAPIClient.GetBlock( 875 c.Ctx(), 876 &pfs.GetBlockRequest{Block: NewBlock(hash)}, 877 ) 878 if err != nil { 879 return grpcutil.ScrubGRPC(err) 880 } 881 if err := grpcutil.WriteFromStreamingBytesClient(getBlockClient, w); err != nil { 882 return grpcutil.ScrubGRPC(err) 883 } 884 return nil 885 } 886 887 // PutBlock puts a block. 888 func (c APIClient) PutBlock(hash string, _r io.Reader) (_ int64, retErr error) { 889 r := grpcutil.ReaderWrapper{_r} 890 w, err := c.newPutBlockWriteCloser(hash) 891 if err != nil { 892 return 0, err 893 } 894 defer func() { 895 if err := w.Close(); err != nil && retErr == nil { 896 retErr = errors.Wrap(grpcutil.ScrubGRPC(err), "Close") 897 } 898 }() 899 buf := grpcutil.GetBuffer() 900 defer grpcutil.PutBuffer(buf) 901 written, err := io.CopyBuffer(w, r, buf) 902 if err != nil { 903 return written, errors.Wrap(grpcutil.ScrubGRPC(err), "CopyBuffer") 904 } 905 // return value set by deferred function 906 return written, nil 907 } 908 909 // Compact forces compaction of objects. 910 func (c APIClient) Compact() error { 911 _, err := c.ObjectAPIClient.Compact( 912 c.Ctx(), 913 &types.Empty{}, 914 ) 915 return err 916 } 917 918 // DirectObjReader returns a reader for the contents of an obj in object 919 // storage, it reads directly from object storage, bypassing the 920 // content-addressing layer. 921 func (c APIClient) DirectObjReader(obj string) (io.ReadCloser, error) { 922 getObjClient, err := c.ObjectAPIClient.GetObjDirect( 923 c.Ctx(), 924 &pfs.GetObjDirectRequest{Obj: obj}, 925 ) 926 if err != nil { 927 return nil, grpcutil.ScrubGRPC(err) 928 } 929 return grpcutil.NewStreamingBytesReader(getObjClient, nil), nil 930 } 931 932 // DirectObjWriter returns a writer for an obj in object storage, it writes 933 // directly to object storage, bypassing the content-addressing layer. 934 func (c APIClient) DirectObjWriter(obj string) (io.WriteCloser, error) { 935 return c.newPutObjWriteCloser(obj) 936 } 937 938 // PutFileClient is a client interface for putting files. There are 2 939 // implementations, 1 that does each file as a seperate request and one that 940 // does them all together in the same request. 941 type PutFileClient interface { 942 // PutFileWriter writes a file to PFS. 943 // NOTE: PutFileWriter returns an io.WriteCloser that you must call Close on when 944 // you are done writing. 945 PutFileWriter(repoName, commitID, path string) (io.WriteCloser, error) 946 947 // PutFileSplitWriter writes multiple files to PFS by splitting up the data 948 // that is written to it. 949 // NOTE: PutFileSplitWriter returns an io.WriteCloser that you must call Close on when 950 // you are done writing. 951 PutFileSplitWriter(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool) (io.WriteCloser, error) 952 953 // PutFile writes a file to PFS from a reader. 954 PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error) 955 956 // PutFileOverwrite is like PutFile but it overwrites the file rather than 957 // appending to it. overwriteIndex allows you to specify the index of the 958 // object starting from which you'd like to overwrite. If you want to 959 // overwrite the entire file, specify an index of 0. 960 PutFileOverwrite(repoName string, commitID string, path string, reader io.Reader, overwriteIndex int64) (_ int, retErr error) 961 962 // PutFileSplit writes a file to PFS from a reader. 963 // delimiter is used to tell PFS how to break the input into blocks. 964 PutFileSplit(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool, reader io.Reader) (_ int, retErr error) 965 966 // PutFileURL puts a file using the content found at a URL. 967 // The URL is sent to the server which performs the request. 968 // recursive allows for recursive scraping of some types URLs. For example on s3:// urls. 969 PutFileURL(repoName string, commitID string, path string, url string, recursive bool, overwrite bool) error 970 971 // DeleteFile deletes a file from a Commit. 972 // DeleteFile leaves a tombstone in the Commit, assuming the file isn't written 973 // to later attempting to get the file from the finished commit will result in 974 // not found error. 975 // The file will of course remain intact in the Commit's parent. 976 DeleteFile(repoName string, commitID string, path string) error 977 978 // Close must be called after you're done using a PutFileClient. 979 // Further requests will throw errors. 980 Close() error 981 } 982 983 type putFileClient struct { 984 c pfs.API_PutFileClient 985 mu sync.Mutex 986 oneoff bool // indicates a one time use putFileClient 987 } 988 989 // NewPutFileClient returns a new client for putting files into pfs in a single request. 990 func (c APIClient) NewPutFileClient() (PutFileClient, error) { 991 if c.storageV2 { 992 return c.newPutFileClientV2(), nil 993 } 994 pfc, err := c.PfsAPIClient.PutFile(c.Ctx()) 995 if err != nil { 996 return nil, grpcutil.ScrubGRPC(err) 997 } 998 return &putFileClient{c: pfc}, nil 999 } 1000 1001 func (c APIClient) newOneoffPutFileClient() (PutFileClient, error) { 1002 if c.storageV2 { 1003 return c.newPutFileClientV2(), nil 1004 } 1005 pfc, err := c.PfsAPIClient.PutFile(c.Ctx()) 1006 if err != nil { 1007 return nil, grpcutil.ScrubGRPC(err) 1008 } 1009 return &putFileClient{c: pfc, oneoff: true}, nil 1010 } 1011 1012 // PutFileWriter writes a file to PFS. 1013 // NOTE: PutFileWriter returns an io.WriteCloser you must call Close on it when 1014 // you are done writing. 1015 func (c *putFileClient) PutFileWriter(repoName, commitID, path string) (io.WriteCloser, error) { 1016 return c.newPutFileWriteCloser(repoName, commitID, path, pfs.Delimiter_NONE, 0, 0, 0, nil) 1017 } 1018 1019 // PutFileSplitWriter writes a multiple files to PFS by splitting up the data 1020 // that is written to it. 1021 // NOTE: PutFileSplitWriter returns an io.WriteCloser you must call Close on it when 1022 // you are done writing. 1023 func (c *putFileClient) PutFileSplitWriter(repoName string, commitID string, path string, 1024 delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool) (io.WriteCloser, error) { 1025 // TODO(msteffen) add headerRecords 1026 var overwriteIndex *pfs.OverwriteIndex 1027 if overwrite { 1028 overwriteIndex = &pfs.OverwriteIndex{} 1029 } 1030 return c.newPutFileWriteCloser(repoName, commitID, path, delimiter, targetFileDatums, targetFileBytes, headerRecords, overwriteIndex) 1031 } 1032 1033 // PutFile writes a file to PFS from a reader. 1034 func (c *putFileClient) PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error) { 1035 return c.PutFileSplit(repoName, commitID, path, pfs.Delimiter_NONE, 0, 0, 0, false, reader) 1036 } 1037 1038 // PutFileOverwrite is like PutFile but it overwrites the file rather than 1039 // appending to it. overwriteIndex allows you to specify the index of the 1040 // object starting from which you'd like to overwrite. If you want to 1041 // overwrite the entire file, specify an index of 0. 1042 func (c *putFileClient) PutFileOverwrite(repoName string, commitID string, path string, reader io.Reader, overwriteIndex int64) (_ int, retErr error) { 1043 writer, err := c.newPutFileWriteCloser(repoName, commitID, path, pfs.Delimiter_NONE, 0, 0, 0, &pfs.OverwriteIndex{Index: overwriteIndex}) 1044 if err != nil { 1045 return 0, grpcutil.ScrubGRPC(err) 1046 } 1047 defer func() { 1048 if err := writer.Close(); err != nil && retErr == nil { 1049 retErr = err 1050 } 1051 }() 1052 written, err := io.Copy(writer, reader) 1053 return int(written), grpcutil.ScrubGRPC(err) 1054 } 1055 1056 //PutFileSplit writes a file to PFS from a reader 1057 // delimiter is used to tell PFS how to break the input into blocks 1058 func (c *putFileClient) PutFileSplit(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool, reader io.Reader) (_ int, retErr error) { 1059 writer, err := c.PutFileSplitWriter(repoName, commitID, path, delimiter, targetFileDatums, targetFileBytes, headerRecords, overwrite) 1060 if err != nil { 1061 return 0, grpcutil.ScrubGRPC(err) 1062 } 1063 defer func() { 1064 if err := writer.Close(); err != nil && retErr == nil { 1065 retErr = err 1066 } 1067 }() 1068 buf := grpcutil.GetBuffer() 1069 defer grpcutil.PutBuffer(buf) 1070 written, err := io.CopyBuffer(writer, reader, buf) 1071 return int(written), grpcutil.ScrubGRPC(err) 1072 } 1073 1074 // PutFileURL puts a file using the content found at a URL. 1075 // The URL is sent to the server which performs the request. 1076 // recursive allow for recursive scraping of some types URLs for example on s3:// urls. 1077 func (c *putFileClient) PutFileURL(repoName string, commitID string, path string, url string, recursive bool, overwrite bool) (retErr error) { 1078 c.mu.Lock() 1079 defer c.mu.Unlock() 1080 var overwriteIndex *pfs.OverwriteIndex 1081 if overwrite { 1082 overwriteIndex = &pfs.OverwriteIndex{} 1083 } 1084 if c.oneoff { 1085 defer func() { 1086 if err := grpcutil.ScrubGRPC(c.Close()); err != nil && retErr == nil { 1087 retErr = err 1088 } 1089 }() 1090 } 1091 if err := c.c.Send(&pfs.PutFileRequest{ 1092 File: NewFile(repoName, commitID, path), 1093 Url: url, 1094 Recursive: recursive, 1095 OverwriteIndex: overwriteIndex, 1096 }); err != nil { 1097 return grpcutil.ScrubGRPC(err) 1098 } 1099 return nil 1100 } 1101 1102 func (c *putFileClient) DeleteFile(repoName string, commitID string, path string) (retErr error) { 1103 c.mu.Lock() 1104 defer c.mu.Unlock() 1105 if c.oneoff { 1106 defer func() { 1107 if err := grpcutil.ScrubGRPC(c.Close()); err != nil && retErr == nil { 1108 retErr = err 1109 } 1110 }() 1111 } 1112 if err := c.c.Send(&pfs.PutFileRequest{ 1113 File: NewFile(repoName, commitID, path), 1114 Delete: true, 1115 }); err != nil { 1116 return grpcutil.ScrubGRPC(err) 1117 } 1118 return nil 1119 } 1120 1121 // Close must be called after you're done using a putFileClient. 1122 // Further requests will throw errors. 1123 func (c *putFileClient) Close() error { 1124 _, err := c.c.CloseAndRecv() 1125 return grpcutil.ScrubGRPC(err) 1126 } 1127 1128 // PutFileWriter writes a file to PFS. 1129 // NOTE: PutFileWriter returns an io.WriteCloser you must call Close on it when 1130 // you are done writing. 1131 func (c APIClient) PutFileWriter(repoName string, commitID string, path string) (io.WriteCloser, error) { 1132 pfc, err := c.newOneoffPutFileClient() 1133 if err != nil { 1134 return nil, err 1135 } 1136 return pfc.PutFileWriter(repoName, commitID, path) 1137 } 1138 1139 // PutFileSplitWriter writes a multiple files to PFS by splitting up the data 1140 // that is written to it. 1141 // NOTE: PutFileSplitWriter returns an io.WriteCloser you must call Close on it when 1142 // you are done writing. 1143 func (c APIClient) PutFileSplitWriter(repoName string, commitID string, path string, 1144 delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool) (io.WriteCloser, error) { 1145 pfc, err := c.newOneoffPutFileClient() 1146 if err != nil { 1147 return nil, err 1148 } 1149 return pfc.PutFileSplitWriter(repoName, commitID, path, delimiter, targetFileDatums, targetFileBytes, headerRecords, overwrite) 1150 } 1151 1152 // PutFile writes a file to PFS from a reader. 1153 func (c APIClient) PutFile(repoName string, commitID string, path string, reader io.Reader) (_ int, retErr error) { 1154 pfc, err := c.newOneoffPutFileClient() 1155 if err != nil { 1156 return 0, err 1157 } 1158 return pfc.PutFile(repoName, commitID, path, reader) 1159 } 1160 1161 // PutFileOverwrite is like PutFile but it overwrites the file rather than 1162 // appending to it. overwriteIndex allows you to specify the index of the 1163 // object starting from which you'd like to overwrite. If you want to 1164 // overwrite the entire file, specify an index of 0. 1165 func (c APIClient) PutFileOverwrite(repoName string, commitID string, path string, reader io.Reader, overwriteIndex int64) (_ int, retErr error) { 1166 pfc, err := c.newOneoffPutFileClient() 1167 if err != nil { 1168 return 0, err 1169 } 1170 return pfc.PutFileOverwrite(repoName, commitID, path, reader, overwriteIndex) 1171 } 1172 1173 //PutFileSplit writes a file to PFS from a reader 1174 // delimiter is used to tell PFS how to break the input into blocks 1175 func (c APIClient) PutFileSplit(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool, reader io.Reader) (_ int, retErr error) { 1176 // TODO(msteffen) update 1177 pfc, err := c.newOneoffPutFileClient() 1178 if err != nil { 1179 return 0, err 1180 } 1181 return pfc.PutFileSplit(repoName, commitID, path, delimiter, targetFileDatums, targetFileBytes, headerRecords, overwrite, reader) 1182 } 1183 1184 // PutFileURL puts a file using the content found at a URL. 1185 // The URL is sent to the server which performs the request. 1186 // recursive allow for recursive scraping of some types URLs for example on s3:// urls. 1187 func (c APIClient) PutFileURL(repoName string, commitID string, path string, url string, recursive bool, overwrite bool) (retErr error) { 1188 pfc, err := c.newOneoffPutFileClient() 1189 if err != nil { 1190 return err 1191 } 1192 return pfc.PutFileURL(repoName, commitID, path, url, recursive, overwrite) 1193 } 1194 1195 // CopyFile copys a file from one pfs location to another. It can be used on 1196 // directories or regular files. 1197 func (c APIClient) CopyFile(srcRepo, srcCommit, srcPath, dstRepo, dstCommit, dstPath string, overwrite bool) error { 1198 if _, err := c.PfsAPIClient.CopyFile(c.Ctx(), 1199 &pfs.CopyFileRequest{ 1200 Src: NewFile(srcRepo, srcCommit, srcPath), 1201 Dst: NewFile(dstRepo, dstCommit, dstPath), 1202 Overwrite: overwrite, 1203 }); err != nil { 1204 return grpcutil.ScrubGRPC(err) 1205 } 1206 return nil 1207 } 1208 1209 // GetFile returns the contents of a file at a specific Commit. 1210 // offset specifies a number of bytes that should be skipped in the beginning of the file. 1211 // size limits the total amount of data returned, note you will get fewer bytes 1212 // than size if you pass a value larger than the size of the file. 1213 // If size is set to 0 then all of the data will be returned. 1214 func (c APIClient) GetFile(repoName string, commitID string, path string, offset int64, size int64, writer io.Writer) error { 1215 if c.limiter != nil { 1216 c.limiter.Acquire() 1217 defer c.limiter.Release() 1218 } 1219 if c.storageV2 { 1220 return c.GetFileV2(repoName, commitID, path, writer) 1221 } 1222 apiGetFileClient, err := c.getFile(repoName, commitID, path, offset, size) 1223 if err != nil { 1224 return grpcutil.ScrubGRPC(err) 1225 } 1226 if err := grpcutil.WriteFromStreamingBytesClient(apiGetFileClient, writer); err != nil { 1227 return grpcutil.ScrubGRPC(err) 1228 } 1229 return nil 1230 } 1231 1232 // GetFileReader returns a reader for the contents of a file at a specific Commit. 1233 // offset specifies a number of bytes that should be skipped in the beginning of the file. 1234 // size limits the total amount of data returned, note you will get fewer bytes 1235 // than size if you pass a value larger than the size of the file. 1236 // If size is set to 0 then all of the data will be returned. 1237 func (c APIClient) GetFileReader(repoName string, commitID string, path string, offset int64, size int64) (io.Reader, error) { 1238 apiGetFileClient, err := c.getFile(repoName, commitID, path, offset, size) 1239 if err != nil { 1240 return nil, grpcutil.ScrubGRPC(err) 1241 } 1242 return grpcutil.NewStreamingBytesReader(apiGetFileClient, nil), nil 1243 } 1244 1245 // GetFileReadSeeker returns a reader for the contents of a file at a specific 1246 // Commit that permits Seeking to different points in the file. 1247 func (c APIClient) GetFileReadSeeker(repoName string, commitID string, path string) (io.ReadSeeker, error) { 1248 fileInfo, err := c.InspectFile(repoName, commitID, path) 1249 if err != nil { 1250 return nil, err 1251 } 1252 reader, err := c.GetFileReader(repoName, commitID, path, 0, 0) 1253 if err != nil { 1254 return nil, err 1255 } 1256 return &getFileReadSeeker{ 1257 Reader: reader, 1258 file: NewFile(repoName, commitID, path), 1259 offset: 0, 1260 size: int64(fileInfo.SizeBytes), 1261 c: c, 1262 }, nil 1263 } 1264 1265 func (c APIClient) getFile(repoName string, commitID string, path string, offset int64, 1266 size int64) (pfs.API_GetFileClient, error) { 1267 return c.PfsAPIClient.GetFile( 1268 c.Ctx(), 1269 &pfs.GetFileRequest{ 1270 File: NewFile(repoName, commitID, path), 1271 OffsetBytes: offset, 1272 SizeBytes: size, 1273 }, 1274 ) 1275 } 1276 1277 // InspectFile returns info about a specific file. 1278 func (c APIClient) InspectFile(repoName string, commitID string, path string) (*pfs.FileInfo, error) { 1279 return c.inspectFile(repoName, commitID, path) 1280 } 1281 1282 func (c APIClient) inspectFile(repoName string, commitID string, path string) (*pfs.FileInfo, error) { 1283 fileInfo, err := c.PfsAPIClient.InspectFile( 1284 c.Ctx(), 1285 &pfs.InspectFileRequest{ 1286 File: NewFile(repoName, commitID, path), 1287 }, 1288 ) 1289 if err != nil { 1290 return nil, grpcutil.ScrubGRPC(err) 1291 } 1292 return fileInfo, nil 1293 } 1294 1295 // ListFile returns info about all files in a Commit under path. 1296 func (c APIClient) ListFile(repoName string, commitID string, path string) ([]*pfs.FileInfo, error) { 1297 var result []*pfs.FileInfo 1298 if err := c.ListFileF(repoName, commitID, path, 0, func(fi *pfs.FileInfo) error { 1299 result = append(result, fi) 1300 return nil 1301 }); err != nil { 1302 return nil, err 1303 } 1304 return result, nil 1305 } 1306 1307 // ListFileHistory returns info about all files and their history in a Commit under path. 1308 func (c APIClient) ListFileHistory(repoName string, commitID string, path string, history int64) ([]*pfs.FileInfo, error) { 1309 var result []*pfs.FileInfo 1310 if err := c.ListFileF(repoName, commitID, path, history, func(fi *pfs.FileInfo) error { 1311 result = append(result, fi) 1312 return nil 1313 }); err != nil { 1314 return nil, err 1315 } 1316 return result, nil 1317 } 1318 1319 // ListFileF returns info about all files in a Commit under path, calling f with each FileInfo. 1320 func (c APIClient) ListFileF(repoName string, commitID string, path string, history int64, f func(fi *pfs.FileInfo) error) error { 1321 fs, err := c.PfsAPIClient.ListFileStream( 1322 c.Ctx(), 1323 &pfs.ListFileRequest{ 1324 File: NewFile(repoName, commitID, path), 1325 History: history, 1326 }, 1327 ) 1328 if err != nil { 1329 return grpcutil.ScrubGRPC(err) 1330 } 1331 for { 1332 fi, err := fs.Recv() 1333 if errors.Is(err, io.EOF) { 1334 return nil 1335 } else if err != nil { 1336 return grpcutil.ScrubGRPC(err) 1337 } 1338 if err := f(fi); err != nil { 1339 if errors.Is(err, errutil.ErrBreak) { 1340 return nil 1341 } 1342 return err 1343 } 1344 } 1345 } 1346 1347 // GlobFile returns files that match a given glob pattern in a given commit. 1348 // The pattern is documented here: 1349 // https://golang.org/pkg/path/filepath/#Match 1350 func (c APIClient) GlobFile(repoName string, commitID string, pattern string) ([]*pfs.FileInfo, error) { 1351 fs, err := c.PfsAPIClient.GlobFileStream( 1352 c.Ctx(), 1353 &pfs.GlobFileRequest{ 1354 Commit: NewCommit(repoName, commitID), 1355 Pattern: pattern, 1356 }, 1357 ) 1358 if err != nil { 1359 return nil, grpcutil.ScrubGRPC(err) 1360 } 1361 var result []*pfs.FileInfo 1362 for { 1363 f, err := fs.Recv() 1364 if errors.Is(err, io.EOF) { 1365 break 1366 } else if err != nil { 1367 return nil, grpcutil.ScrubGRPC(err) 1368 } 1369 result = append(result, f) 1370 } 1371 return result, nil 1372 } 1373 1374 // GlobFileF returns files that match a given glob pattern in a given commit, 1375 // calling f with each FileInfo. The pattern is documented here: 1376 // https://golang.org/pkg/path/filepath/#Match 1377 func (c APIClient) GlobFileF(repoName string, commitID string, pattern string, f func(fi *pfs.FileInfo) error) error { 1378 fs, err := c.PfsAPIClient.GlobFileStream( 1379 c.Ctx(), 1380 &pfs.GlobFileRequest{ 1381 Commit: NewCommit(repoName, commitID), 1382 Pattern: pattern, 1383 }, 1384 ) 1385 if err != nil { 1386 return grpcutil.ScrubGRPC(err) 1387 } 1388 for { 1389 fi, err := fs.Recv() 1390 if errors.Is(err, io.EOF) { 1391 return nil 1392 } else if err != nil { 1393 return grpcutil.ScrubGRPC(err) 1394 } 1395 if err := f(fi); err != nil { 1396 if errors.Is(err, errutil.ErrBreak) { 1397 return nil 1398 } 1399 return err 1400 } 1401 } 1402 } 1403 1404 // DiffFile returns the difference between 2 paths, old path may be omitted in 1405 // which case the parent of the new path will be used. DiffFile return 2 values 1406 // (unless it returns an error) the first value is files present under new 1407 // path, the second is files present under old path, files which are under both 1408 // paths and have identical content are omitted. 1409 func (c APIClient) DiffFile(newRepoName, newCommitID, newPath, oldRepoName, 1410 oldCommitID, oldPath string, shallow bool) ([]*pfs.FileInfo, []*pfs.FileInfo, error) { 1411 if c.storageV2 { 1412 var newFiles, oldFiles []*pfs.FileInfo 1413 if err := c.DiffFileV2(newRepoName, newCommitID, newPath, oldRepoName, oldCommitID, oldPath, shallow, func(newFile, oldFile *pfs.FileInfo) error { 1414 if newFile != nil { 1415 newFiles = append(newFiles, newFile) 1416 } 1417 if oldFile != nil { 1418 oldFiles = append(oldFiles, oldFile) 1419 } 1420 return nil 1421 }); err != nil { 1422 return nil, nil, err 1423 } 1424 return newFiles, oldFiles, nil 1425 } 1426 var oldFile *pfs.File 1427 if oldRepoName != "" { 1428 oldFile = NewFile(oldRepoName, oldCommitID, oldPath) 1429 } 1430 resp, err := c.PfsAPIClient.DiffFile( 1431 c.Ctx(), 1432 &pfs.DiffFileRequest{ 1433 NewFile: NewFile(newRepoName, newCommitID, newPath), 1434 OldFile: oldFile, 1435 Shallow: shallow, 1436 }, 1437 ) 1438 if err != nil { 1439 return nil, nil, grpcutil.ScrubGRPC(err) 1440 } 1441 return resp.NewFiles, resp.OldFiles, nil 1442 } 1443 1444 // WalkFn is the type of the function called for each file in Walk. 1445 // Returning a non-nil error from WalkFn will result in Walk aborting and 1446 // returning said error. 1447 type WalkFn func(*pfs.FileInfo) error 1448 1449 // Walk walks the pfs filesystem rooted at path. walkFn will be called for each 1450 // file found under path in lexicographical order. This includes both regular 1451 // files and directories. 1452 func (c APIClient) Walk(repoName string, commitID string, path string, f WalkFn) error { 1453 fs, err := c.PfsAPIClient.WalkFile( 1454 c.Ctx(), 1455 &pfs.WalkFileRequest{File: NewFile(repoName, commitID, path)}) 1456 if err != nil { 1457 return grpcutil.ScrubGRPC(err) 1458 } 1459 for { 1460 fi, err := fs.Recv() 1461 if errors.Is(err, io.EOF) { 1462 return nil 1463 } else if err != nil { 1464 return grpcutil.ScrubGRPC(err) 1465 } 1466 if err := f(fi); err != nil { 1467 if errors.Is(err, errutil.ErrBreak) { 1468 return nil 1469 } 1470 return err 1471 } 1472 } 1473 } 1474 1475 // DeleteFile deletes a file from a Commit. 1476 // DeleteFile leaves a tombstone in the Commit, assuming the file isn't written 1477 // to later attempting to get the file from the finished commit will result in 1478 // not found error. 1479 // The file will of course remain intact in the Commit's parent. 1480 func (c APIClient) DeleteFile(repoName string, commitID string, path string) error { 1481 pfc, err := c.newOneoffPutFileClient() 1482 if err != nil { 1483 return err 1484 } 1485 return pfc.DeleteFile(repoName, commitID, path) 1486 } 1487 1488 type putFileWriteCloser struct { 1489 request *pfs.PutFileRequest 1490 sent bool 1491 c *putFileClient 1492 } 1493 1494 // Fsck performs checks on pfs. Errors that are encountered will be passed 1495 // onError. These aren't errors in the traditional sense, in that they don't 1496 // prevent the completion of fsck. Errors that do prevent completion will be 1497 // returned from the function. 1498 func (c APIClient) Fsck(fix bool, cb func(*pfs.FsckResponse) error) error { 1499 fsckClient, err := c.PfsAPIClient.Fsck(c.Ctx(), &pfs.FsckRequest{Fix: fix}) 1500 if err != nil { 1501 return grpcutil.ScrubGRPC(err) 1502 } 1503 for { 1504 resp, err := fsckClient.Recv() 1505 if err != nil { 1506 if errors.Is(err, io.EOF) { 1507 break 1508 } 1509 return grpcutil.ScrubGRPC(err) 1510 } 1511 if err := cb(resp); err != nil { 1512 if errors.Is(err, errutil.ErrBreak) { 1513 break 1514 } 1515 return err 1516 } 1517 } 1518 return nil 1519 } 1520 1521 // FsckFastExit performs checks on pfs, similar to Fsck, except that it returns the 1522 // first fsck error it encounters and exits. 1523 func (c APIClient) FsckFastExit() error { 1524 ctx, cancel := context.WithCancel(c.Ctx()) 1525 defer cancel() 1526 fsckClient, err := c.PfsAPIClient.Fsck(ctx, &pfs.FsckRequest{}) 1527 if err != nil { 1528 return grpcutil.ScrubGRPC(err) 1529 } 1530 for { 1531 resp, err := fsckClient.Recv() 1532 if err != nil { 1533 if errors.Is(err, io.EOF) { 1534 return nil 1535 } 1536 return grpcutil.ScrubGRPC(err) 1537 } 1538 if resp.Error != "" { 1539 return errors.Errorf(resp.Error) 1540 } 1541 } 1542 } 1543 1544 func (c *putFileClient) newPutFileWriteCloser(repoName string, commitID string, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwriteIndex *pfs.OverwriteIndex) (*putFileWriteCloser, error) { 1545 c.mu.Lock() // Unlocked in Close() 1546 return &putFileWriteCloser{ 1547 request: &pfs.PutFileRequest{ 1548 File: NewFile(repoName, commitID, path), 1549 Delimiter: delimiter, 1550 TargetFileDatums: targetFileDatums, 1551 TargetFileBytes: targetFileBytes, 1552 HeaderRecords: headerRecords, 1553 OverwriteIndex: overwriteIndex, 1554 }, 1555 c: c, 1556 }, nil 1557 } 1558 1559 func (w *putFileWriteCloser) Write(p []byte) (int, error) { 1560 bytesWritten := 0 1561 for { 1562 // Buffer the write so that we don't exceed the grpc 1563 // MaxMsgSize. This value includes the whole payload 1564 // including headers, so we're conservative and halve it 1565 ceil := bytesWritten + grpcutil.MaxMsgSize/2 1566 if ceil > len(p) { 1567 ceil = len(p) 1568 } 1569 actualP := p[bytesWritten:ceil] 1570 if len(actualP) == 0 { 1571 break 1572 } 1573 w.request.Value = actualP 1574 if err := w.c.c.Send(w.request); err != nil { 1575 return 0, grpcutil.ScrubGRPC(err) 1576 } 1577 w.sent = true 1578 w.request.Value = nil 1579 // File must only be set on the first request containing data written to 1580 // that path 1581 // TODO(msteffen): can other fields be zeroed as well? 1582 w.request.File = nil 1583 bytesWritten += len(actualP) 1584 } 1585 return bytesWritten, nil 1586 } 1587 1588 func (w *putFileWriteCloser) Close() (retErr error) { 1589 defer w.c.mu.Unlock() 1590 if w.c.oneoff { 1591 defer func() { 1592 if err := w.c.Close(); err != nil && retErr == nil { 1593 retErr = grpcutil.ScrubGRPC(err) 1594 } 1595 }() 1596 } 1597 // we always send at least one request, otherwise it's impossible to create 1598 // an empty file 1599 if !w.sent { 1600 if err := w.c.c.Send(w.request); err != nil { 1601 return grpcutil.ScrubGRPC(err) 1602 } 1603 } 1604 return nil 1605 } 1606 1607 type putObjectWriteCloser struct { 1608 request *pfs.PutObjectRequest 1609 client pfs.ObjectAPI_PutObjectClient 1610 object *pfs.Object 1611 } 1612 1613 func (c APIClient) newPutObjectWriteCloser(tags ...string) (*putObjectWriteCloser, error) { 1614 client, err := c.ObjectAPIClient.PutObject(c.Ctx()) 1615 if err != nil { 1616 return nil, grpcutil.ScrubGRPC(err) 1617 } 1618 var _tags []*pfs.Tag 1619 for _, tag := range tags { 1620 _tags = append(_tags, &pfs.Tag{Name: tag}) 1621 } 1622 return &putObjectWriteCloser{ 1623 request: &pfs.PutObjectRequest{ 1624 Tags: _tags, 1625 }, 1626 client: client, 1627 }, nil 1628 } 1629 1630 func (w *putObjectWriteCloser) Write(p []byte) (int, error) { 1631 for _, dataSlice := range grpcutil.Chunk(p) { 1632 w.request.Value = dataSlice 1633 if err := w.client.Send(w.request); err != nil { 1634 return 0, grpcutil.ScrubGRPC(err) 1635 } 1636 w.request.Tags = nil 1637 } 1638 return len(p), nil 1639 } 1640 1641 func (w *putObjectWriteCloser) Close() error { 1642 var err error 1643 w.object, err = w.client.CloseAndRecv() 1644 return grpcutil.ScrubGRPC(err) 1645 } 1646 1647 // PutObjectWriteCloserAsync wraps a put object call in an asynchronous buffered writer. 1648 type PutObjectWriteCloserAsync struct { 1649 client pfs.ObjectAPI_PutObjectClient 1650 request *pfs.PutObjectRequest 1651 buf []byte 1652 writeChan chan []byte 1653 errChan chan error 1654 object *pfs.Object 1655 } 1656 1657 func (c APIClient) newPutObjectWriteCloserAsync(tags []*pfs.Tag) (*PutObjectWriteCloserAsync, error) { 1658 client, err := c.ObjectAPIClient.PutObject(c.Ctx()) 1659 if err != nil { 1660 return nil, grpcutil.ScrubGRPC(err) 1661 } 1662 w := &PutObjectWriteCloserAsync{ 1663 client: client, 1664 request: &pfs.PutObjectRequest{ 1665 Tags: tags, 1666 }, 1667 buf: grpcutil.GetBuffer()[:0], 1668 writeChan: make(chan []byte, 5), 1669 errChan: make(chan error), 1670 } 1671 go func() { 1672 for buf := range w.writeChan { 1673 w.request.Value = buf 1674 if err := w.client.Send(w.request); err != nil { 1675 w.errChan <- err 1676 break 1677 } 1678 w.request.Tags = nil 1679 grpcutil.PutBuffer(buf[:cap(buf)]) 1680 } 1681 close(w.errChan) 1682 }() 1683 return w, nil 1684 } 1685 1686 // Write performs a write. 1687 func (w *PutObjectWriteCloserAsync) Write(p []byte) (int, error) { 1688 var written int 1689 for len(w.buf)+len(p) > cap(w.buf) { 1690 // Write the bytes that fit into w.buf, then 1691 // remove those bytes from p. 1692 i := cap(w.buf) - len(w.buf) 1693 w.buf = append(w.buf, p[:i]...) 1694 if err := w.writeBuf(); err != nil { 1695 return 0, err 1696 } 1697 written += i 1698 p = p[i:] 1699 w.buf = grpcutil.GetBuffer()[:0] 1700 } 1701 w.buf = append(w.buf, p...) 1702 written += len(p) 1703 return written, nil 1704 } 1705 1706 // Close closes the writer. 1707 func (w *PutObjectWriteCloserAsync) Close() error { 1708 if err := w.writeBuf(); err != nil { 1709 return err 1710 } 1711 close(w.writeChan) 1712 err := <-w.errChan 1713 if err != nil { 1714 return grpcutil.ScrubGRPC(err) 1715 } 1716 w.object, err = w.client.CloseAndRecv() 1717 return grpcutil.ScrubGRPC(err) 1718 } 1719 1720 func (w *PutObjectWriteCloserAsync) writeBuf() error { 1721 select { 1722 case err := <-w.errChan: 1723 if err != nil { 1724 return grpcutil.ScrubGRPC(err) 1725 } 1726 case w.writeChan <- w.buf: 1727 } 1728 return nil 1729 } 1730 1731 // Object gets the pfs object for this writer. 1732 // This can only be called when the writer is closed (the put object 1733 // call is complete) 1734 func (w *PutObjectWriteCloserAsync) Object() (*pfs.Object, error) { 1735 select { 1736 case err := <-w.errChan: 1737 if err != nil { 1738 return nil, grpcutil.ScrubGRPC(err) 1739 } 1740 return w.object, nil 1741 default: 1742 return nil, errors.Errorf("attempting to get object before closing object writer") 1743 } 1744 } 1745 1746 type putObjectSplitWriteCloser struct { 1747 request *pfs.PutObjectRequest 1748 client pfs.ObjectAPI_PutObjectSplitClient 1749 objects []*pfs.Object 1750 } 1751 1752 func (c APIClient) newPutObjectSplitWriteCloser() (*putObjectSplitWriteCloser, error) { 1753 client, err := c.ObjectAPIClient.PutObjectSplit(c.Ctx()) 1754 if err != nil { 1755 return nil, grpcutil.ScrubGRPC(err) 1756 } 1757 return &putObjectSplitWriteCloser{ 1758 request: &pfs.PutObjectRequest{}, 1759 client: client, 1760 }, nil 1761 } 1762 1763 func (w *putObjectSplitWriteCloser) Write(p []byte) (int, error) { 1764 for _, dataSlice := range grpcutil.Chunk(p) { 1765 w.request.Value = dataSlice 1766 if err := w.client.Send(w.request); err != nil { 1767 return 0, grpcutil.ScrubGRPC(err) 1768 } 1769 } 1770 return len(p), nil 1771 } 1772 1773 func (w *putObjectSplitWriteCloser) Close() error { 1774 objects, err := w.client.CloseAndRecv() 1775 if err != nil { 1776 return grpcutil.ScrubGRPC(err) 1777 } 1778 w.objects = objects.Objects 1779 return nil 1780 } 1781 1782 type getFileReadSeeker struct { 1783 io.Reader 1784 file *pfs.File 1785 offset int64 1786 size int64 1787 c APIClient 1788 } 1789 1790 func (r *getFileReadSeeker) Seek(offset int64, whence int) (int64, error) { 1791 getFileReader := func(offset int64) (io.Reader, error) { 1792 return r.c.GetFileReader(r.file.Commit.Repo.Name, r.file.Commit.ID, r.file.Path, offset, 0) 1793 } 1794 switch whence { 1795 case io.SeekStart: 1796 reader, err := getFileReader(offset) 1797 if err != nil { 1798 return r.offset, err 1799 } 1800 r.offset = offset 1801 r.Reader = reader 1802 case io.SeekCurrent: 1803 reader, err := getFileReader(r.offset + offset) 1804 if err != nil { 1805 return r.offset, err 1806 } 1807 r.offset += offset 1808 r.Reader = reader 1809 case io.SeekEnd: 1810 reader, err := getFileReader(r.size - offset) 1811 if err != nil { 1812 return r.offset, err 1813 } 1814 r.offset = r.size - offset 1815 r.Reader = reader 1816 } 1817 return r.offset, nil 1818 } 1819 1820 type putBlockWriteCloser struct { 1821 request *pfs.PutBlockRequest 1822 client pfs.ObjectAPI_PutBlockClient 1823 } 1824 1825 func (c APIClient) newPutBlockWriteCloser(hash string) (*putBlockWriteCloser, error) { 1826 client, err := c.ObjectAPIClient.PutBlock(c.Ctx()) 1827 if err != nil { 1828 return nil, grpcutil.ScrubGRPC(err) 1829 } 1830 return &putBlockWriteCloser{ 1831 request: &pfs.PutBlockRequest{Block: NewBlock(hash)}, 1832 client: client, 1833 }, nil 1834 } 1835 1836 func (w *putBlockWriteCloser) Write(p []byte) (int, error) { 1837 for _, dataSlice := range grpcutil.Chunk(p) { 1838 w.request.Value = dataSlice 1839 if err := w.client.Send(w.request); err != nil { 1840 return 0, grpcutil.ScrubGRPC(err) 1841 } 1842 w.request.Block = nil 1843 } 1844 return len(p), nil 1845 } 1846 1847 func (w *putBlockWriteCloser) Close() error { 1848 if w.request.Block != nil { 1849 // This happens if the block is empty in which case Write was never 1850 // called, so we need to send an empty request to identify the block. 1851 if err := w.client.Send(w.request); err != nil { 1852 return grpcutil.ScrubGRPC(err) 1853 } 1854 } 1855 _, err := w.client.CloseAndRecv() 1856 return grpcutil.ScrubGRPC(err) 1857 } 1858 1859 type putObjWriteCloser struct { 1860 request *pfs.PutObjDirectRequest 1861 client pfs.ObjectAPI_PutObjDirectClient 1862 } 1863 1864 func (c APIClient) newPutObjWriteCloser(obj string) (*putObjWriteCloser, error) { 1865 client, err := c.ObjectAPIClient.PutObjDirect(c.Ctx()) 1866 if err != nil { 1867 return nil, grpcutil.ScrubGRPC(err) 1868 } 1869 return &putObjWriteCloser{ 1870 request: &pfs.PutObjDirectRequest{Obj: obj}, 1871 client: client, 1872 }, nil 1873 } 1874 1875 func (w *putObjWriteCloser) Write(p []byte) (int, error) { 1876 for _, dataSlice := range grpcutil.Chunk(p) { 1877 w.request.Value = dataSlice 1878 if err := w.client.Send(w.request); err != nil { 1879 return 0, grpcutil.ScrubGRPC(err) 1880 } 1881 w.request.Obj = "" 1882 } 1883 return len(p), nil 1884 } 1885 1886 func (w *putObjWriteCloser) Close() error { 1887 if w.request.Obj != "" { 1888 // This happens if the block is empty in which case Write was never 1889 // called, so we need to send an empty request to identify the block. 1890 if err := w.client.Send(w.request); err != nil { 1891 return grpcutil.ScrubGRPC(err) 1892 } 1893 } 1894 _, err := w.client.CloseAndRecv() 1895 return grpcutil.ScrubGRPC(err) 1896 }