github.com/pachyderm/pachyderm@v1.13.4/src/client/admin/v1_8/pfs/pfs.proto (about) 1 syntax = "proto3"; 2 3 package pfs_1_8; 4 option go_package = "github.com/pachyderm/pachyderm/src/client/admin/v1_8/pfs"; 5 6 import "google/protobuf/empty.proto"; 7 import "google/protobuf/timestamp.proto"; 8 import "google/protobuf/wrappers.proto"; 9 10 import "gogoproto/gogo.proto"; 11 12 import "client/admin/v1_8/auth/auth.proto"; 13 14 //// PFS Data structures (stored in etcd) 15 16 message Repo { 17 string name = 1; 18 } 19 20 message Branch { 21 Repo repo = 1; 22 string name = 2; 23 } 24 25 message BranchInfo { 26 Branch branch = 4; 27 Commit head = 2; 28 repeated Branch provenance = 3; 29 repeated Branch subvenance = 5; 30 repeated Branch direct_provenance = 6; 31 32 // Deprecated field left for backward compatibility. 33 string name = 1; 34 } 35 36 message BranchInfos { 37 repeated BranchInfo branch_info = 1; 38 } 39 40 message File { 41 Commit commit = 1; 42 string path = 2; 43 } 44 45 message Block { 46 string hash = 1; 47 } 48 49 message Object { 50 string hash = 1; 51 } 52 53 message Tag { 54 string name = 1; 55 } 56 57 // RepoInfo is the main data structure representing a Repo in etcd 58 message RepoInfo { 59 reserved 4; 60 Repo repo = 1; 61 google.protobuf.Timestamp created = 2; 62 uint64 size_bytes = 3; 63 string description = 5; 64 repeated Branch branches = 7; 65 66 // Set by ListRepo and InspectRepo if Pachyderm's auth system is active, but 67 // not stored in etcd. To set a user's auth scope for a repo, use the 68 // Pachyderm Auth API (in src/client/admin/v1_8/auth/auth.proto) 69 RepoAuthInfo auth_info = 6; 70 } 71 72 // RepoAuthInfo includes the caller's access scope for a repo, and is returned 73 // by ListRepo and InspectRepo but not persisted in etcd. It's used by the 74 // Pachyderm dashboard to render repo access appropriately. To set a user's auth 75 // scope for a repo, use the Pachyderm Auth API (in src/client/admin/v1_8/auth/auth.proto) 76 message RepoAuthInfo { 77 // The callers access level to the relevant repo (e.g. may be OWNER even if 78 // the user isn't an OWNER of the repo, if they're an admin for the cluster) 79 auth_1_8.Scope access_level = 1; 80 } 81 82 // Commit is a reference to a commit (e.g. the collection of branches and the 83 // collection of currently-open commits in etcd are collections of Commit 84 // protos) 85 message Commit { 86 Repo repo = 1; 87 string id = 2 [(gogoproto.customname) = "ID"]; 88 } 89 90 // CommitRange represents chain of commits with Lower being an ancestor of 91 // Upper or, in the case of a range of size 1, the same commit. 92 message CommitRange { 93 Commit lower = 1; 94 Commit upper = 2; 95 } 96 97 // CommitInfo is the main data structure representing a commit in etcd 98 message CommitInfo { 99 Commit commit = 1; 100 // description is a user-provided script describing this commit 101 string description = 8; 102 Commit parent_commit = 2; 103 repeated Commit child_commits = 11; 104 google.protobuf.Timestamp started = 3; 105 google.protobuf.Timestamp finished = 4; 106 uint64 size_bytes = 5; 107 108 // Commits on which this commit is provenant. provenance[i] is a commit in 109 // branch_provenance[i] (a branch name, and one of the branches on which this 110 // commit's branch is provenant) 111 repeated Commit provenance = 6; 112 repeated Branch branch_provenance = 10; 113 114 // ReadyProvenance is the number of provenant commits which have been 115 // finished, if ReadyProvenance == len(Provenance) then the commit is ready 116 // to be processed by pps. 117 int64 ready_provenance = 12; 118 119 repeated CommitRange subvenance = 9; 120 // this is the block that stores the serialized form of a tree that 121 // represents the entire file system hierarchy of the repo at this commit 122 // If this is nil, then the commit is either open (in which case 'finished' 123 // will also be nil) or is the output commit of a failed job (in which case 124 // 'finished' will have a value -- the end time of the job) 125 Object tree = 7; 126 repeated Object trees = 13; 127 Object datums = 14; 128 } 129 130 enum FileType { 131 RESERVED = 0; 132 FILE = 1; 133 DIR = 2; 134 } 135 136 message FileInfo { 137 File file = 1; 138 FileType file_type = 2; 139 uint64 size_bytes = 3; 140 google.protobuf.Timestamp committed = 10; 141 // the base names (i.e. just the filenames, not the full paths) of 142 // the children 143 repeated string children = 6; 144 repeated Object objects = 8; 145 repeated BlockRef blockRefs = 9; 146 bytes hash = 7; 147 } 148 149 message ByteRange { 150 uint64 lower = 1; 151 uint64 upper = 2; 152 } 153 154 message BlockRef { 155 Block block = 1; 156 ByteRange range = 2; 157 } 158 159 message ObjectInfo { 160 Object object = 1; 161 BlockRef block_ref = 2; 162 } 163 164 // PFS API 165 166 message CreateRepoRequest { 167 reserved 2; 168 Repo repo = 1; 169 string description = 3; 170 bool update = 4; 171 } 172 173 message InspectRepoRequest { 174 Repo repo = 1; 175 } 176 177 message ListRepoRequest { 178 reserved 1; 179 } 180 181 message ListRepoResponse { 182 repeated RepoInfo repo_info = 1; 183 } 184 185 message DeleteRepoRequest { 186 Repo repo = 1; 187 bool force = 2; 188 bool all = 3; 189 } 190 191 // CommitState describes the states a commit can be in. 192 // The states are increasingly specific, i.e. a commit that is FINISHED also counts as STARTED. 193 enum CommitState { 194 STARTED = 0; // The commit has been started, all commits satisfy this state. 195 READY = 1; // The commit has been started, and all of its provenant commits have been finished. 196 FINISHED = 2; // The commit has been finished. 197 } 198 199 message StartCommitRequest { 200 // Parent.ID may be empty in which case the commit that Branch points to will be used as the parent. 201 // If branch is empty, or if branch does not exist, the commit will have no parent. 202 Commit parent = 1; 203 // description is a user-provided string describing this commit 204 string description = 4; 205 string branch = 3; 206 repeated Commit provenance = 2; 207 } 208 209 message BuildCommitRequest { 210 Commit parent = 1; 211 string branch = 4; 212 repeated Commit provenance = 2; 213 Object tree = 3; 214 // ID sets the ID of the created commit. 215 string ID = 5; 216 } 217 218 message FinishCommitRequest { 219 Commit commit = 1; 220 // description is a user-provided string describing this commit. Setting this 221 // will overwrite the description set in StartCommit 222 string description = 2; 223 224 Object tree = 3; 225 repeated Object trees = 5; 226 Object datums = 7; 227 uint64 size_bytes = 6; 228 // If set, 'commit' will be closed (its 'finished' field will be set to the 229 // current time) but its 'tree' will be left nil. 230 bool empty = 4; 231 } 232 233 message InspectCommitRequest { 234 Commit commit = 1; 235 // BlockState causes inspect commit to block until the commit is in the desired state. 236 CommitState block_state = 2; 237 } 238 239 message ListCommitRequest { 240 Repo repo = 1; 241 Commit from = 2; 242 Commit to = 3; 243 uint64 number = 4; 244 } 245 246 message CommitInfos { 247 repeated CommitInfo commit_info = 1; 248 } 249 250 message CreateBranchRequest { 251 Commit head = 1; 252 // s_branch matches the field number and type of SetBranchRequest.Branch in 253 // Pachyderm 1.6--so that operations (generated by pachyderm 1.6's 254 // Admin.Export) can be deserialized by pachyderm 1.7 correctly 255 string s_branch = 2; 256 Branch branch = 3; 257 repeated Branch provenance = 4; 258 } 259 260 message InspectBranchRequest { 261 Branch branch = 1; 262 } 263 264 message ListBranchRequest { 265 Repo repo = 1; 266 } 267 268 message DeleteBranchRequest { 269 Branch branch = 1; 270 bool force = 2; 271 } 272 273 message DeleteCommitRequest { 274 Commit commit = 1; 275 } 276 277 message FlushCommitRequest { 278 repeated Commit commits = 1; 279 repeated Repo to_repos = 2; 280 } 281 282 message SubscribeCommitRequest { 283 Repo repo = 1; 284 string branch = 2; 285 // only commits created since this commit are returned 286 Commit from = 3; 287 // Don't return commits until they're in (at least) the desired state. 288 CommitState state = 4; 289 } 290 291 message GetFileRequest { 292 File file = 1; 293 int64 offset_bytes = 2; 294 int64 size_bytes = 3; 295 } 296 297 enum Delimiter { 298 NONE = 0; 299 JSON = 1; 300 LINE = 2; 301 SQL = 3; 302 CSV = 4; 303 } 304 305 // An OverwriteIndex specifies the index of objects from which new writes 306 // are applied to. Existing objects starting from the index are deleted. 307 // We want a separate message for ObjectIndex because we want to be able to 308 // distinguish between a zero index and a non-existent index. 309 message OverwriteIndex { 310 int64 index = 1; 311 } 312 313 message PutFileRequest { 314 reserved 2, 4; 315 File file = 1; 316 bytes value = 3; 317 string url = 5; 318 // applies only to URLs that can be recursively walked, for example s3:// URLs 319 bool recursive = 6; 320 // Delimiter causes data to be broken up into separate files with File.Path 321 // as a prefix. 322 Delimiter delimiter = 7; 323 // TargetFileDatums specifies the target number of datums in each written 324 // file it may be lower if data does not split evenly, but will never be 325 // higher, unless the value is 0. 326 int64 target_file_datums = 8; 327 // TargetFileBytes specifies the target number of bytes in each written 328 // file, files may have more or fewer bytes than the target. 329 int64 target_file_bytes = 9; 330 // header_records is an option for splitting data when 'delimiter' is not NONE 331 // (or SQL). It specifies the number of records that are converted to a 332 // header and applied to all file shards. 333 // 334 // This is particularly useful for CSV files, where the first row often 335 // contains column titles; if 'header_records' is set to one in that case, 336 // the first row will be associated with the directory that contains the rest 337 // of the split-up csv rows as files, and if any data is retrieved from that 338 // directory by GetFile, it will appear to begin with that first row of 339 // column labels (including in pipeline workers). 340 // 341 // Note that SQL files have their own logic for determining headers (their 342 // header is not a number of records, but a collection of SQL commands that 343 // create the relevant tables and such). This way, SQL files retrieved by 344 // GetFile can be passed to psql, and they will set up the appropriate tables 345 // before inserting the records in the files that were retrieved. 346 int64 header_records = 11; 347 // overwrite_index is the object index where the write starts from. All 348 // existing objects starting from the index are deleted. 349 OverwriteIndex overwrite_index = 10; 350 } 351 352 // PutFileRecord is used to record PutFile requests in etcd temporarily. 353 message PutFileRecord { 354 int64 size_bytes = 1; 355 string object_hash = 2; 356 OverwriteIndex overwrite_index = 3; 357 } 358 359 message PutFileRecords { 360 bool split = 1; 361 repeated PutFileRecord records = 2; 362 bool tombstone = 3; 363 PutFileRecord header = 4; 364 PutFileRecord footer = 5; 365 } 366 367 message CopyFileRequest { 368 File src = 1; 369 File dst = 2; 370 bool overwrite = 3; 371 } 372 373 message InspectFileRequest { 374 File file = 1; 375 } 376 377 message ListFileRequest { 378 // File is the parent directory of the files we want to list. This sets the 379 // repo, the commit/branch, and path prefix of files we're interested in 380 // If the "path" field is omitted, a list of files at the top level of the repo 381 // is returned 382 File file = 1; 383 384 // Full indicates whether the result should include file contents, which may 385 // be large (i.e. the list of children for directories, and the list of object 386 // references for regular files) 387 bool full = 2; 388 389 // History indicates how many historical versions you want returned. Its 390 // semantics are: 391 // 0: Return the files as they are at the commit in `file`. FileInfo.File 392 // will equal File in this request. 393 // 1: Return the files as they are in the last commit they were modified in. 394 // (This will have the same hash as if you'd passed 0, but 395 // FileInfo.File.Commit will be different. 396 // 2: Return the above and the files as they are in the next-last commit they 397 // were modified in. 398 // 3: etc. 399 //-1: Return all historical versions. 400 int64 history = 3; 401 } 402 403 message WalkFileRequest { 404 File file = 1; 405 } 406 407 message GlobFileRequest { 408 Commit commit = 1; 409 string pattern = 2; 410 } 411 412 // FileInfos is the result of both ListFile and GlobFile 413 message FileInfos { 414 repeated FileInfo file_info = 1; 415 } 416 417 message DiffFileRequest { 418 File new_file = 1; 419 // OldFile may be left nil in which case the same path in the parent of 420 // NewFile's commit will be used. 421 File old_file = 2; 422 bool shallow = 3; 423 } 424 425 message DiffFileResponse { 426 repeated FileInfo new_files = 1; 427 repeated FileInfo old_files = 2; 428 } 429 430 message DeleteFileRequest { 431 File file = 1; 432 } 433 434 service API { 435 // Repo rpcs 436 // CreateRepo creates a new repo. 437 // An error is returned if the repo already exists. 438 rpc CreateRepo(CreateRepoRequest) returns (google.protobuf.Empty) {} 439 // InspectRepo returns info about a repo. 440 rpc InspectRepo(InspectRepoRequest) returns (RepoInfo) {} 441 // ListRepo returns info about all repos. 442 rpc ListRepo(ListRepoRequest) returns (ListRepoResponse) {} 443 // DeleteRepo deletes a repo. 444 rpc DeleteRepo(DeleteRepoRequest) returns (google.protobuf.Empty) {} 445 446 // Commit rpcs 447 // StartCommit creates a new write commit from a parent commit. 448 rpc StartCommit(StartCommitRequest) returns (Commit) {} 449 // FinishCommit turns a write commit into a read commit. 450 rpc FinishCommit(FinishCommitRequest) returns (google.protobuf.Empty) {} 451 // InspectCommit returns the info about a commit. 452 rpc InspectCommit(InspectCommitRequest) returns (CommitInfo) {} 453 // ListCommit returns info about all commits. This is deprecated in favor of 454 // ListCommitStream. 455 rpc ListCommit(ListCommitRequest) returns (CommitInfos) {} 456 // ListCommitStream is like ListCommit, but returns its results in a GRPC stream 457 rpc ListCommitStream(ListCommitRequest) returns (stream CommitInfo) {} 458 // DeleteCommit deletes a commit. 459 rpc DeleteCommit(DeleteCommitRequest) returns (google.protobuf.Empty) {} 460 // FlushCommit waits for downstream commits to finish 461 rpc FlushCommit(FlushCommitRequest) returns (stream CommitInfo) {} 462 // SubscribeCommit subscribes for new commits on a given branch 463 rpc SubscribeCommit(SubscribeCommitRequest) returns (stream CommitInfo) {} 464 // BuildCommit builds a commit that's backed by the given tree 465 rpc BuildCommit(BuildCommitRequest) returns (Commit) {} 466 467 // CreateBranch creates a new branch 468 rpc CreateBranch(CreateBranchRequest) returns (google.protobuf.Empty) {} 469 // InspectBranch returns info about a branch. 470 rpc InspectBranch(InspectBranchRequest) returns (BranchInfo) {} 471 // ListBranch returns info about the heads of branches. 472 rpc ListBranch(ListBranchRequest) returns (BranchInfos) {} 473 // DeleteBranch deletes a branch; note that the commits still exist. 474 rpc DeleteBranch(DeleteBranchRequest) returns (google.protobuf.Empty) {} 475 476 // File rpcs 477 // PutFile writes the specified file to pfs. 478 rpc PutFile(stream PutFileRequest) returns (google.protobuf.Empty) {} 479 // CopyFile copies the contents of one file to another. 480 rpc CopyFile(CopyFileRequest) returns (google.protobuf.Empty) {} 481 // GetFile returns a byte stream of the contents of the file. 482 rpc GetFile(GetFileRequest) returns (stream google.protobuf.BytesValue) {} 483 // InspectFile returns info about a file. 484 rpc InspectFile(InspectFileRequest) returns (FileInfo) {} 485 // ListFile returns info about all files. This is deprecated in favor of 486 // ListFileStream 487 rpc ListFile(ListFileRequest) returns (FileInfos) {} 488 // ListFileStream is a streaming version of ListFile 489 // TODO(msteffen): When the dash has been updated to use ListFileStream, 490 // replace ListFile with this RPC (https://github.com/pachyderm/dash/issues/201) 491 rpc ListFileStream(ListFileRequest) returns (stream FileInfo) {} 492 // WalkFile walks over all the files under a directory, including children of children. 493 rpc WalkFile(WalkFileRequest) returns (stream FileInfo) {} 494 // GlobFile returns info about all files. This is deprecated in favor of 495 // GlobFileStream 496 rpc GlobFile(GlobFileRequest) returns (FileInfos) {} 497 // GlobFileStream is a streaming version of GlobFile 498 // TODO(msteffen): When the dash has been updated to use GlobFileStream, 499 // replace GlobFile with this RPC (https://github.com/pachyderm/dash/issues/201) 500 rpc GlobFileStream(GlobFileRequest) returns (stream FileInfo) {} 501 // DiffFile returns the differences between 2 paths at 2 commits. 502 rpc DiffFile(DiffFileRequest) returns (DiffFileResponse) {} 503 // DeleteFile deletes a file. 504 rpc DeleteFile(DeleteFileRequest) returns (google.protobuf.Empty) {} 505 506 // DeleteAll deletes everything 507 rpc DeleteAll(google.protobuf.Empty) returns (google.protobuf.Empty) {} 508 } 509 510 message PutObjectRequest { 511 bytes value = 1; 512 repeated Tag tags = 2; 513 Block block = 3; 514 } 515 516 message GetObjectsRequest { 517 repeated Object objects = 1; 518 uint64 offset_bytes = 2; 519 // The number of bytes requested. 520 uint64 size_bytes = 3; 521 // The total amount of bytes in these objects. It's OK if it's not 522 // entirely accurate or if it's unknown (in which case it'd be set to 0). 523 // It's used primarily as a hint for cache eviction. 524 uint64 total_size = 4; 525 } 526 527 message GetBlocksRequest { 528 repeated BlockRef blockRefs = 1; 529 uint64 offset_bytes = 2; 530 // The number of bytes requested. 531 uint64 size_bytes = 3; 532 // The total amount of bytes in these blocks. It's OK if it's not 533 // entirely accurate or if it's unknown (in which case it'd be set to 0). 534 // It's used primarily as a hint for cache eviction. 535 uint64 total_size = 4; 536 } 537 538 message TagObjectRequest { 539 Object object = 1; 540 repeated Tag tags = 2; 541 } 542 543 message ListObjectsRequest {} 544 545 message ListTagsRequest { 546 string prefix = 1; 547 bool include_object = 2; 548 } 549 550 message ListTagsResponse { 551 Tag tag = 1; 552 Object object = 2; 553 } 554 555 message DeleteObjectsRequest { 556 repeated Object objects = 1; 557 } 558 559 message DeleteObjectsResponse {} 560 561 message DeleteTagsRequest { 562 repeated Tag tags = 1; 563 } 564 565 message DeleteTagsResponse {} 566 567 message CheckObjectRequest { 568 Object object = 1; 569 } 570 571 message CheckObjectResponse { 572 bool exists = 1; 573 } 574 575 message Objects { 576 repeated Object objects = 1; 577 } 578 579 service ObjectAPI { 580 rpc PutObject(stream PutObjectRequest) returns (Object) {} 581 rpc PutObjectSplit(stream PutObjectRequest) returns (Objects) {} 582 rpc PutObjects(stream PutObjectRequest) returns (google.protobuf.Empty) {} 583 rpc GetObject(Object) returns (stream google.protobuf.BytesValue) {} 584 rpc GetObjects(GetObjectsRequest) returns (stream google.protobuf.BytesValue) {} 585 rpc GetBlocks(GetBlocksRequest) returns (stream google.protobuf.BytesValue) {} 586 rpc TagObject(TagObjectRequest) returns (google.protobuf.Empty) {} 587 rpc InspectObject(Object) returns (ObjectInfo) {} 588 // CheckObject checks if an object exists in the blob store without 589 // actually reading the object. 590 rpc CheckObject(CheckObjectRequest) returns (CheckObjectResponse) {} 591 rpc ListObjects(ListObjectsRequest) returns (stream Object) {} 592 rpc DeleteObjects(DeleteObjectsRequest) returns (DeleteObjectsResponse) {} 593 rpc GetTag(Tag) returns (stream google.protobuf.BytesValue) {} 594 rpc InspectTag(Tag) returns (ObjectInfo) {} 595 rpc ListTags(ListTagsRequest) returns (stream ListTagsResponse) {} 596 rpc DeleteTags(DeleteTagsRequest) returns (DeleteTagsResponse) {} 597 rpc Compact(google.protobuf.Empty) returns (google.protobuf.Empty) {} 598 } 599 600 message ObjectIndex { 601 map<string, BlockRef> objects = 1; 602 map<string, Object> tags = 2; 603 } 604