github.com/pachyderm/pachyderm@v1.13.4/src/client/pfs_v2.go (about) 1 package client 2 3 import ( 4 "context" 5 "io" 6 "io/ioutil" 7 "os" 8 "time" 9 10 "github.com/pachyderm/pachyderm/src/client/pfs" 11 "github.com/pachyderm/pachyderm/src/client/pkg/errors" 12 "github.com/pachyderm/pachyderm/src/client/pkg/grpcutil" 13 "github.com/pachyderm/pachyderm/src/server/pkg/storage/renew" 14 "github.com/pachyderm/pachyderm/src/server/pkg/tar" 15 "github.com/pachyderm/pachyderm/src/server/pkg/tarutil" 16 ) 17 18 // PutTarV2 puts a tar stream into PFS. 19 func (c APIClient) PutTarV2(repo, commit string, r io.Reader, overwrite bool, tag ...string) error { 20 foc, err := c.NewFileOperationClientV2(repo, commit) 21 if err != nil { 22 return err 23 } 24 if err := foc.PutTar(r, overwrite, tag...); err != nil { 25 return err 26 } 27 return foc.Close() 28 } 29 30 // DeleteFilesV2 deletes a set of files. 31 // The optional tag field indicates specific tags in the files to delete. 32 func (c APIClient) DeleteFilesV2(repo, commit string, files []string, tag ...string) error { 33 foc, err := c.NewFileOperationClientV2(repo, commit) 34 if err != nil { 35 return err 36 } 37 if err := foc.DeleteFiles(files, tag...); err != nil { 38 return err 39 } 40 return foc.Close() 41 } 42 43 // FileOperationClient is used for performing a stream of file operations. 44 // The operations are not persisted until the FileOperationClient is closed. 45 // FileOperationClient is not thread safe. Multiple FileOperationClients 46 // should be used for concurrent upload. 47 type FileOperationClient struct { 48 client pfs.API_FileOperationV2Client 49 fileOperationCore 50 } 51 52 // WithFileOperationClientV2 creates a new FileOperationClient that is scoped to the passed in callback. 53 func (c APIClient) WithFileOperationClientV2(repo, commit string, cb func(*FileOperationClient) error) (retErr error) { 54 foc, err := c.NewFileOperationClientV2(repo, commit) 55 if err != nil { 56 return err 57 } 58 defer func() { 59 if retErr == nil { 60 retErr = foc.Close() 61 } 62 }() 63 return cb(foc) 64 } 65 66 // NewFileOperationClientV2 creates a new FileOperationClient. 67 func (c APIClient) NewFileOperationClientV2(repo, commit string) (_ *FileOperationClient, retErr error) { 68 defer func() { 69 retErr = grpcutil.ScrubGRPC(retErr) 70 }() 71 client, err := c.PfsAPIClient.FileOperationV2(c.Ctx()) 72 if err != nil { 73 return nil, err 74 } 75 if err := client.Send(&pfs.FileOperationRequestV2{ 76 Commit: NewCommit(repo, commit), 77 }); err != nil { 78 return nil, err 79 } 80 return &FileOperationClient{ 81 client: client, 82 fileOperationCore: fileOperationCore{ 83 client: client, 84 }, 85 }, nil 86 } 87 88 // Close closes the FileOperationClient. 89 func (foc *FileOperationClient) Close() error { 90 return foc.maybeError(func() error { 91 _, err := foc.client.CloseAndRecv() 92 return err 93 }) 94 } 95 96 type fileOperationCore struct { 97 client interface { 98 Send(*pfs.FileOperationRequestV2) error 99 } 100 err error 101 } 102 103 // PutTar puts a tar stream into PFS. 104 func (foc *fileOperationCore) PutTar(r io.Reader, overwrite bool, tag ...string) error { 105 return foc.maybeError(func() error { 106 ptr := &pfs.PutTarRequestV2{Overwrite: overwrite} 107 if len(tag) > 0 { 108 if len(tag) > 1 { 109 return errors.Errorf("PutTar called with %v tags, expected 0 or 1", len(tag)) 110 } 111 ptr.Tag = tag[0] 112 } 113 if err := foc.sendPutTar(ptr); err != nil { 114 return err 115 } 116 _, err := grpcutil.ChunkReader(r, func(data []byte) error { 117 return foc.sendPutTar(&pfs.PutTarRequestV2{Data: data}) 118 }) 119 return err 120 }) 121 } 122 123 func (foc *fileOperationCore) maybeError(f func() error) (retErr error) { 124 if foc.err != nil { 125 return foc.err 126 } 127 defer func() { 128 retErr = grpcutil.ScrubGRPC(retErr) 129 if retErr != nil { 130 foc.err = retErr 131 } 132 }() 133 return f() 134 } 135 136 func (foc *fileOperationCore) sendPutTar(req *pfs.PutTarRequestV2) error { 137 return foc.client.Send(&pfs.FileOperationRequestV2{ 138 Operation: &pfs.FileOperationRequestV2_PutTar{ 139 PutTar: req, 140 }, 141 }) 142 } 143 144 // DeleteFiles deletes a set of files. 145 // The optional tag field indicates specific tags in the files to delete. 146 func (foc *fileOperationCore) DeleteFiles(files []string, tag ...string) error { 147 return foc.maybeError(func() error { 148 req := &pfs.DeleteFilesRequestV2{Files: files} 149 if len(tag) > 0 { 150 if len(tag) > 1 { 151 return errors.Errorf("DeleteFiles called with %v tags, expected 0 or 1", len(tag)) 152 } 153 req.Tag = tag[0] 154 } 155 return foc.sendDeleteFiles(req) 156 }) 157 } 158 159 func (foc *fileOperationCore) sendDeleteFiles(req *pfs.DeleteFilesRequestV2) error { 160 return foc.client.Send(&pfs.FileOperationRequestV2{ 161 Operation: &pfs.FileOperationRequestV2_DeleteFiles{ 162 DeleteFiles: req, 163 }, 164 }) 165 } 166 167 // GetTarV2 gets a tar stream out of PFS that contains files at the repo and commit that match the path. 168 func (c APIClient) GetTarV2(repo, commit, path string) (_ io.Reader, retErr error) { 169 defer func() { 170 retErr = grpcutil.ScrubGRPC(retErr) 171 }() 172 req := &pfs.GetTarRequestV2{ 173 File: NewFile(repo, commit, path), 174 } 175 client, err := c.PfsAPIClient.GetTarV2(c.Ctx(), req) 176 if err != nil { 177 return nil, err 178 } 179 return grpcutil.NewStreamingBytesReader(client, nil), nil 180 } 181 182 // DiffFileV2 returns the differences between 2 paths at 2 commits. 183 // It streams back one file at a time which is either from the new path, or the old path 184 func (c APIClient) DiffFileV2(newRepo, newCommit, newPath, oldRepo, 185 oldCommit, oldPath string, shallow bool, cb func(*pfs.FileInfo, *pfs.FileInfo) error) (retErr error) { 186 defer func() { 187 retErr = grpcutil.ScrubGRPC(retErr) 188 }() 189 ctx, cancel := context.WithCancel(c.Ctx()) 190 defer cancel() 191 var oldFile *pfs.File 192 if oldRepo != "" { 193 oldFile = NewFile(oldRepo, oldCommit, oldPath) 194 } 195 req := &pfs.DiffFileRequest{ 196 NewFile: NewFile(newRepo, newCommit, newPath), 197 OldFile: oldFile, 198 Shallow: shallow, 199 } 200 client, err := c.PfsAPIClient.DiffFileV2(ctx, req) 201 if err != nil { 202 return err 203 } 204 for { 205 resp, err := client.Recv() 206 if err != nil { 207 if errors.Is(err, io.EOF) { 208 break 209 } 210 return err 211 } 212 if err := cb(resp.NewFile, resp.OldFile); err != nil { 213 return err 214 } 215 } 216 return nil 217 } 218 219 // ClearCommitV2 clears the state of an open commit. 220 func (c APIClient) ClearCommitV2(repo, commit string) (retErr error) { 221 defer func() { 222 retErr = grpcutil.ScrubGRPC(retErr) 223 }() 224 _, err := c.PfsAPIClient.ClearCommitV2( 225 c.Ctx(), 226 &pfs.ClearCommitRequestV2{ 227 Commit: NewCommit(repo, commit), 228 }, 229 ) 230 return err 231 } 232 233 // PutFileV2 puts a file into PFS. 234 // TODO: Change this to not buffer the file locally. 235 // We will want to move to a model where we buffer in chunk storage. 236 func (c APIClient) PutFileV2(repo string, commit string, path string, r io.Reader, overwrite bool) error { 237 return withTmpFile(func(tarF *os.File) error { 238 if err := withTmpFile(func(f *os.File) error { 239 size, err := io.Copy(f, r) 240 if err != nil { 241 return err 242 } 243 _, err = f.Seek(0, 0) 244 if err != nil { 245 return err 246 } 247 return tarutil.WithWriter(tarF, func(tw *tar.Writer) error { 248 return tarutil.WriteFile(tw, tarutil.NewStreamFile(path, size, f)) 249 }) 250 }); err != nil { 251 return err 252 } 253 _, err := tarF.Seek(0, 0) 254 if err != nil { 255 return err 256 } 257 return c.PutTarV2(repo, commit, tarF, overwrite) 258 }) 259 } 260 261 // TODO: refactor into utility package, also exists in debug util. 262 func withTmpFile(cb func(*os.File) error) (retErr error) { 263 if err := os.MkdirAll(os.TempDir(), 0700); err != nil { 264 return err 265 } 266 f, err := ioutil.TempFile(os.TempDir(), "pachyderm_put_file") 267 if err != nil { 268 return err 269 } 270 defer func() { 271 if err := os.Remove(f.Name()); retErr == nil { 272 retErr = err 273 } 274 if err := f.Close(); retErr == nil { 275 retErr = err 276 } 277 }() 278 return cb(f) 279 } 280 281 // GetFileV2 gets a file out of PFS. 282 func (c APIClient) GetFileV2(repo string, commit string, path string, w io.Writer) error { 283 r, err := c.GetTarV2(repo, commit, path) 284 if err != nil { 285 return err 286 } 287 return tarutil.Iterate(r, func(f tarutil.File) error { 288 return f.Content(w) 289 }, true) 290 } 291 292 // TmpRepoName is a reserved repo name used for namespacing temporary filesets 293 const TmpRepoName = "__tmp__" 294 295 // TmpFileSetCommit creates a commit which can be used to access the temporary fileset fileSetID 296 func (c APIClient) TmpFileSetCommit(fileSetID string) *pfs.Commit { 297 return &pfs.Commit{ 298 ID: fileSetID, 299 Repo: &pfs.Repo{Name: TmpRepoName}, 300 } 301 } 302 303 // DefaultTTL is the default time-to-live for a temporary fileset. 304 const DefaultTTL = 10 * time.Minute 305 306 // WithRenewer provides a scoped temporary fileset renewer. 307 func (c APIClient) WithRenewer(cb func(context.Context, *renew.StringSet) error) error { 308 rf := func(ctx context.Context, p string, ttl time.Duration) error { 309 return c.WithCtx(ctx).RenewTmpFileSet(p, ttl) 310 } 311 return renew.WithStringSet(c.Ctx(), DefaultTTL, rf, cb) 312 } 313 314 // WithCreateTmpFileSetClient provides a scoped temporary fileset client. 315 func (c APIClient) WithCreateTmpFileSetClient(cb func(*CreateTmpFileSetClient) error) (resp *pfs.CreateTmpFileSetResponse, retErr error) { 316 ctfsc, err := c.NewCreateTmpFileSetClient() 317 if err != nil { 318 return nil, err 319 } 320 defer func() { 321 if retErr == nil { 322 resp, retErr = ctfsc.Close() 323 } 324 }() 325 return nil, cb(ctfsc) 326 } 327 328 // CreateTmpFileSetClient is used to create a temporary fileset. 329 type CreateTmpFileSetClient struct { 330 client pfs.API_CreateTmpFileSetClient 331 fileOperationCore 332 } 333 334 // NewCreateTmpFileSetClient returns a CreateTmpFileSetClient instance backed by this client 335 func (c APIClient) NewCreateTmpFileSetClient() (_ *CreateTmpFileSetClient, retErr error) { 336 defer func() { 337 retErr = grpcutil.ScrubGRPC(retErr) 338 }() 339 client, err := c.PfsAPIClient.CreateTmpFileSet(c.Ctx()) 340 if err != nil { 341 return nil, err 342 } 343 return &CreateTmpFileSetClient{ 344 client: client, 345 fileOperationCore: fileOperationCore{ 346 client: client, 347 }, 348 }, nil 349 } 350 351 // Close closes the CreateTmpFileSetClient. 352 func (ctfsc *CreateTmpFileSetClient) Close() (*pfs.CreateTmpFileSetResponse, error) { 353 var ret *pfs.CreateTmpFileSetResponse 354 if err := ctfsc.maybeError(func() error { 355 resp, err := ctfsc.client.CloseAndRecv() 356 if err != nil { 357 return err 358 } 359 ret = resp 360 return nil 361 }); err != nil { 362 return nil, err 363 } 364 return ret, nil 365 } 366 367 // RenewTmpFileSet renews a temporary fileset. 368 func (c APIClient) RenewTmpFileSet(ID string, ttl time.Duration) (retErr error) { 369 defer func() { 370 retErr = grpcutil.ScrubGRPC(retErr) 371 }() 372 _, err := c.PfsAPIClient.RenewTmpFileSet( 373 c.Ctx(), 374 &pfs.RenewTmpFileSetRequest{ 375 FilesetId: ID, 376 TtlSeconds: int64(ttl.Seconds()), 377 }, 378 ) 379 return err 380 } 381 382 var errV1NotImplemented = errors.Errorf("V1 method not implemented") 383 384 type putFileClientV2 struct { 385 c APIClient 386 } 387 388 func (c APIClient) newPutFileClientV2() PutFileClient { 389 return &putFileClientV2{c: c} 390 } 391 392 func (pfc *putFileClientV2) PutFileWriter(repo, commit, path string) (io.WriteCloser, error) { 393 return nil, errV1NotImplemented 394 } 395 396 func (pfc *putFileClientV2) PutFileSplitWriter(repo, commit, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool) (io.WriteCloser, error) { 397 return nil, errV1NotImplemented 398 } 399 400 func (pfc *putFileClientV2) PutFile(repo, commit, path string, r io.Reader) (int, error) { 401 return 0, pfc.c.PutFileV2(repo, commit, path, r, false) 402 } 403 404 func (pfc *putFileClientV2) PutFileOverwrite(repo, commit, path string, r io.Reader, overwriteIndex int64) (int, error) { 405 return 0, pfc.c.PutFileV2(repo, commit, path, r, true) 406 } 407 408 func (pfc *putFileClientV2) PutFileSplit(repo, commit, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool, r io.Reader) (int, error) { 409 // TODO: Add split support. 410 return 0, errV1NotImplemented 411 } 412 413 func (pfc *putFileClientV2) PutFileURL(repo, commit, path, url string, recursive bool, overwrite bool) error { 414 // TODO: Add URL support. 415 return errV1NotImplemented 416 } 417 418 func (pfc *putFileClientV2) DeleteFile(repo, commit, path string) error { 419 return pfc.c.DeleteFilesV2(repo, commit, []string{path}) 420 } 421 422 func (pfc *putFileClientV2) Close() error { 423 return nil 424 }