github.com/pachyderm/pachyderm@v1.13.4/src/client/pfs_v2.go (about)

     1  package client
     2  
     3  import (
     4  	"context"
     5  	"io"
     6  	"io/ioutil"
     7  	"os"
     8  	"time"
     9  
    10  	"github.com/pachyderm/pachyderm/src/client/pfs"
    11  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    12  	"github.com/pachyderm/pachyderm/src/client/pkg/grpcutil"
    13  	"github.com/pachyderm/pachyderm/src/server/pkg/storage/renew"
    14  	"github.com/pachyderm/pachyderm/src/server/pkg/tar"
    15  	"github.com/pachyderm/pachyderm/src/server/pkg/tarutil"
    16  )
    17  
    18  // PutTarV2 puts a tar stream into PFS.
    19  func (c APIClient) PutTarV2(repo, commit string, r io.Reader, overwrite bool, tag ...string) error {
    20  	foc, err := c.NewFileOperationClientV2(repo, commit)
    21  	if err != nil {
    22  		return err
    23  	}
    24  	if err := foc.PutTar(r, overwrite, tag...); err != nil {
    25  		return err
    26  	}
    27  	return foc.Close()
    28  }
    29  
    30  // DeleteFilesV2 deletes a set of files.
    31  // The optional tag field indicates specific tags in the files to delete.
    32  func (c APIClient) DeleteFilesV2(repo, commit string, files []string, tag ...string) error {
    33  	foc, err := c.NewFileOperationClientV2(repo, commit)
    34  	if err != nil {
    35  		return err
    36  	}
    37  	if err := foc.DeleteFiles(files, tag...); err != nil {
    38  		return err
    39  	}
    40  	return foc.Close()
    41  }
    42  
    43  // FileOperationClient is used for performing a stream of file operations.
    44  // The operations are not persisted until the FileOperationClient is closed.
    45  // FileOperationClient is not thread safe. Multiple FileOperationClients
    46  // should be used for concurrent upload.
    47  type FileOperationClient struct {
    48  	client pfs.API_FileOperationV2Client
    49  	fileOperationCore
    50  }
    51  
    52  // WithFileOperationClientV2 creates a new FileOperationClient that is scoped to the passed in callback.
    53  func (c APIClient) WithFileOperationClientV2(repo, commit string, cb func(*FileOperationClient) error) (retErr error) {
    54  	foc, err := c.NewFileOperationClientV2(repo, commit)
    55  	if err != nil {
    56  		return err
    57  	}
    58  	defer func() {
    59  		if retErr == nil {
    60  			retErr = foc.Close()
    61  		}
    62  	}()
    63  	return cb(foc)
    64  }
    65  
    66  // NewFileOperationClientV2 creates a new FileOperationClient.
    67  func (c APIClient) NewFileOperationClientV2(repo, commit string) (_ *FileOperationClient, retErr error) {
    68  	defer func() {
    69  		retErr = grpcutil.ScrubGRPC(retErr)
    70  	}()
    71  	client, err := c.PfsAPIClient.FileOperationV2(c.Ctx())
    72  	if err != nil {
    73  		return nil, err
    74  	}
    75  	if err := client.Send(&pfs.FileOperationRequestV2{
    76  		Commit: NewCommit(repo, commit),
    77  	}); err != nil {
    78  		return nil, err
    79  	}
    80  	return &FileOperationClient{
    81  		client: client,
    82  		fileOperationCore: fileOperationCore{
    83  			client: client,
    84  		},
    85  	}, nil
    86  }
    87  
    88  // Close closes the FileOperationClient.
    89  func (foc *FileOperationClient) Close() error {
    90  	return foc.maybeError(func() error {
    91  		_, err := foc.client.CloseAndRecv()
    92  		return err
    93  	})
    94  }
    95  
    96  type fileOperationCore struct {
    97  	client interface {
    98  		Send(*pfs.FileOperationRequestV2) error
    99  	}
   100  	err error
   101  }
   102  
   103  // PutTar puts a tar stream into PFS.
   104  func (foc *fileOperationCore) PutTar(r io.Reader, overwrite bool, tag ...string) error {
   105  	return foc.maybeError(func() error {
   106  		ptr := &pfs.PutTarRequestV2{Overwrite: overwrite}
   107  		if len(tag) > 0 {
   108  			if len(tag) > 1 {
   109  				return errors.Errorf("PutTar called with %v tags, expected 0 or 1", len(tag))
   110  			}
   111  			ptr.Tag = tag[0]
   112  		}
   113  		if err := foc.sendPutTar(ptr); err != nil {
   114  			return err
   115  		}
   116  		_, err := grpcutil.ChunkReader(r, func(data []byte) error {
   117  			return foc.sendPutTar(&pfs.PutTarRequestV2{Data: data})
   118  		})
   119  		return err
   120  	})
   121  }
   122  
   123  func (foc *fileOperationCore) maybeError(f func() error) (retErr error) {
   124  	if foc.err != nil {
   125  		return foc.err
   126  	}
   127  	defer func() {
   128  		retErr = grpcutil.ScrubGRPC(retErr)
   129  		if retErr != nil {
   130  			foc.err = retErr
   131  		}
   132  	}()
   133  	return f()
   134  }
   135  
   136  func (foc *fileOperationCore) sendPutTar(req *pfs.PutTarRequestV2) error {
   137  	return foc.client.Send(&pfs.FileOperationRequestV2{
   138  		Operation: &pfs.FileOperationRequestV2_PutTar{
   139  			PutTar: req,
   140  		},
   141  	})
   142  }
   143  
   144  // DeleteFiles deletes a set of files.
   145  // The optional tag field indicates specific tags in the files to delete.
   146  func (foc *fileOperationCore) DeleteFiles(files []string, tag ...string) error {
   147  	return foc.maybeError(func() error {
   148  		req := &pfs.DeleteFilesRequestV2{Files: files}
   149  		if len(tag) > 0 {
   150  			if len(tag) > 1 {
   151  				return errors.Errorf("DeleteFiles called with %v tags, expected 0 or 1", len(tag))
   152  			}
   153  			req.Tag = tag[0]
   154  		}
   155  		return foc.sendDeleteFiles(req)
   156  	})
   157  }
   158  
   159  func (foc *fileOperationCore) sendDeleteFiles(req *pfs.DeleteFilesRequestV2) error {
   160  	return foc.client.Send(&pfs.FileOperationRequestV2{
   161  		Operation: &pfs.FileOperationRequestV2_DeleteFiles{
   162  			DeleteFiles: req,
   163  		},
   164  	})
   165  }
   166  
   167  // GetTarV2 gets a tar stream out of PFS that contains files at the repo and commit that match the path.
   168  func (c APIClient) GetTarV2(repo, commit, path string) (_ io.Reader, retErr error) {
   169  	defer func() {
   170  		retErr = grpcutil.ScrubGRPC(retErr)
   171  	}()
   172  	req := &pfs.GetTarRequestV2{
   173  		File: NewFile(repo, commit, path),
   174  	}
   175  	client, err := c.PfsAPIClient.GetTarV2(c.Ctx(), req)
   176  	if err != nil {
   177  		return nil, err
   178  	}
   179  	return grpcutil.NewStreamingBytesReader(client, nil), nil
   180  }
   181  
   182  // DiffFileV2 returns the differences between 2 paths at 2 commits.
   183  // It streams back one file at a time which is either from the new path, or the old path
   184  func (c APIClient) DiffFileV2(newRepo, newCommit, newPath, oldRepo,
   185  	oldCommit, oldPath string, shallow bool, cb func(*pfs.FileInfo, *pfs.FileInfo) error) (retErr error) {
   186  	defer func() {
   187  		retErr = grpcutil.ScrubGRPC(retErr)
   188  	}()
   189  	ctx, cancel := context.WithCancel(c.Ctx())
   190  	defer cancel()
   191  	var oldFile *pfs.File
   192  	if oldRepo != "" {
   193  		oldFile = NewFile(oldRepo, oldCommit, oldPath)
   194  	}
   195  	req := &pfs.DiffFileRequest{
   196  		NewFile: NewFile(newRepo, newCommit, newPath),
   197  		OldFile: oldFile,
   198  		Shallow: shallow,
   199  	}
   200  	client, err := c.PfsAPIClient.DiffFileV2(ctx, req)
   201  	if err != nil {
   202  		return err
   203  	}
   204  	for {
   205  		resp, err := client.Recv()
   206  		if err != nil {
   207  			if errors.Is(err, io.EOF) {
   208  				break
   209  			}
   210  			return err
   211  		}
   212  		if err := cb(resp.NewFile, resp.OldFile); err != nil {
   213  			return err
   214  		}
   215  	}
   216  	return nil
   217  }
   218  
   219  // ClearCommitV2 clears the state of an open commit.
   220  func (c APIClient) ClearCommitV2(repo, commit string) (retErr error) {
   221  	defer func() {
   222  		retErr = grpcutil.ScrubGRPC(retErr)
   223  	}()
   224  	_, err := c.PfsAPIClient.ClearCommitV2(
   225  		c.Ctx(),
   226  		&pfs.ClearCommitRequestV2{
   227  			Commit: NewCommit(repo, commit),
   228  		},
   229  	)
   230  	return err
   231  }
   232  
   233  // PutFileV2 puts a file into PFS.
   234  // TODO: Change this to not buffer the file locally.
   235  // We will want to move to a model where we buffer in chunk storage.
   236  func (c APIClient) PutFileV2(repo string, commit string, path string, r io.Reader, overwrite bool) error {
   237  	return withTmpFile(func(tarF *os.File) error {
   238  		if err := withTmpFile(func(f *os.File) error {
   239  			size, err := io.Copy(f, r)
   240  			if err != nil {
   241  				return err
   242  			}
   243  			_, err = f.Seek(0, 0)
   244  			if err != nil {
   245  				return err
   246  			}
   247  			return tarutil.WithWriter(tarF, func(tw *tar.Writer) error {
   248  				return tarutil.WriteFile(tw, tarutil.NewStreamFile(path, size, f))
   249  			})
   250  		}); err != nil {
   251  			return err
   252  		}
   253  		_, err := tarF.Seek(0, 0)
   254  		if err != nil {
   255  			return err
   256  		}
   257  		return c.PutTarV2(repo, commit, tarF, overwrite)
   258  	})
   259  }
   260  
   261  // TODO: refactor into utility package, also exists in debug util.
   262  func withTmpFile(cb func(*os.File) error) (retErr error) {
   263  	if err := os.MkdirAll(os.TempDir(), 0700); err != nil {
   264  		return err
   265  	}
   266  	f, err := ioutil.TempFile(os.TempDir(), "pachyderm_put_file")
   267  	if err != nil {
   268  		return err
   269  	}
   270  	defer func() {
   271  		if err := os.Remove(f.Name()); retErr == nil {
   272  			retErr = err
   273  		}
   274  		if err := f.Close(); retErr == nil {
   275  			retErr = err
   276  		}
   277  	}()
   278  	return cb(f)
   279  }
   280  
   281  // GetFileV2 gets a file out of PFS.
   282  func (c APIClient) GetFileV2(repo string, commit string, path string, w io.Writer) error {
   283  	r, err := c.GetTarV2(repo, commit, path)
   284  	if err != nil {
   285  		return err
   286  	}
   287  	return tarutil.Iterate(r, func(f tarutil.File) error {
   288  		return f.Content(w)
   289  	}, true)
   290  }
   291  
   292  // TmpRepoName is a reserved repo name used for namespacing temporary filesets
   293  const TmpRepoName = "__tmp__"
   294  
   295  // TmpFileSetCommit creates a commit which can be used to access the temporary fileset fileSetID
   296  func (c APIClient) TmpFileSetCommit(fileSetID string) *pfs.Commit {
   297  	return &pfs.Commit{
   298  		ID:   fileSetID,
   299  		Repo: &pfs.Repo{Name: TmpRepoName},
   300  	}
   301  }
   302  
   303  // DefaultTTL is the default time-to-live for a temporary fileset.
   304  const DefaultTTL = 10 * time.Minute
   305  
   306  // WithRenewer provides a scoped temporary fileset renewer.
   307  func (c APIClient) WithRenewer(cb func(context.Context, *renew.StringSet) error) error {
   308  	rf := func(ctx context.Context, p string, ttl time.Duration) error {
   309  		return c.WithCtx(ctx).RenewTmpFileSet(p, ttl)
   310  	}
   311  	return renew.WithStringSet(c.Ctx(), DefaultTTL, rf, cb)
   312  }
   313  
   314  // WithCreateTmpFileSetClient provides a scoped temporary fileset client.
   315  func (c APIClient) WithCreateTmpFileSetClient(cb func(*CreateTmpFileSetClient) error) (resp *pfs.CreateTmpFileSetResponse, retErr error) {
   316  	ctfsc, err := c.NewCreateTmpFileSetClient()
   317  	if err != nil {
   318  		return nil, err
   319  	}
   320  	defer func() {
   321  		if retErr == nil {
   322  			resp, retErr = ctfsc.Close()
   323  		}
   324  	}()
   325  	return nil, cb(ctfsc)
   326  }
   327  
   328  // CreateTmpFileSetClient is used to create a temporary fileset.
   329  type CreateTmpFileSetClient struct {
   330  	client pfs.API_CreateTmpFileSetClient
   331  	fileOperationCore
   332  }
   333  
   334  // NewCreateTmpFileSetClient returns a CreateTmpFileSetClient instance backed by this client
   335  func (c APIClient) NewCreateTmpFileSetClient() (_ *CreateTmpFileSetClient, retErr error) {
   336  	defer func() {
   337  		retErr = grpcutil.ScrubGRPC(retErr)
   338  	}()
   339  	client, err := c.PfsAPIClient.CreateTmpFileSet(c.Ctx())
   340  	if err != nil {
   341  		return nil, err
   342  	}
   343  	return &CreateTmpFileSetClient{
   344  		client: client,
   345  		fileOperationCore: fileOperationCore{
   346  			client: client,
   347  		},
   348  	}, nil
   349  }
   350  
   351  // Close closes the CreateTmpFileSetClient.
   352  func (ctfsc *CreateTmpFileSetClient) Close() (*pfs.CreateTmpFileSetResponse, error) {
   353  	var ret *pfs.CreateTmpFileSetResponse
   354  	if err := ctfsc.maybeError(func() error {
   355  		resp, err := ctfsc.client.CloseAndRecv()
   356  		if err != nil {
   357  			return err
   358  		}
   359  		ret = resp
   360  		return nil
   361  	}); err != nil {
   362  		return nil, err
   363  	}
   364  	return ret, nil
   365  }
   366  
   367  // RenewTmpFileSet renews a temporary fileset.
   368  func (c APIClient) RenewTmpFileSet(ID string, ttl time.Duration) (retErr error) {
   369  	defer func() {
   370  		retErr = grpcutil.ScrubGRPC(retErr)
   371  	}()
   372  	_, err := c.PfsAPIClient.RenewTmpFileSet(
   373  		c.Ctx(),
   374  		&pfs.RenewTmpFileSetRequest{
   375  			FilesetId:  ID,
   376  			TtlSeconds: int64(ttl.Seconds()),
   377  		},
   378  	)
   379  	return err
   380  }
   381  
   382  var errV1NotImplemented = errors.Errorf("V1 method not implemented")
   383  
   384  type putFileClientV2 struct {
   385  	c APIClient
   386  }
   387  
   388  func (c APIClient) newPutFileClientV2() PutFileClient {
   389  	return &putFileClientV2{c: c}
   390  }
   391  
   392  func (pfc *putFileClientV2) PutFileWriter(repo, commit, path string) (io.WriteCloser, error) {
   393  	return nil, errV1NotImplemented
   394  }
   395  
   396  func (pfc *putFileClientV2) PutFileSplitWriter(repo, commit, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool) (io.WriteCloser, error) {
   397  	return nil, errV1NotImplemented
   398  }
   399  
   400  func (pfc *putFileClientV2) PutFile(repo, commit, path string, r io.Reader) (int, error) {
   401  	return 0, pfc.c.PutFileV2(repo, commit, path, r, false)
   402  }
   403  
   404  func (pfc *putFileClientV2) PutFileOverwrite(repo, commit, path string, r io.Reader, overwriteIndex int64) (int, error) {
   405  	return 0, pfc.c.PutFileV2(repo, commit, path, r, true)
   406  }
   407  
   408  func (pfc *putFileClientV2) PutFileSplit(repo, commit, path string, delimiter pfs.Delimiter, targetFileDatums int64, targetFileBytes int64, headerRecords int64, overwrite bool, r io.Reader) (int, error) {
   409  	// TODO: Add split support.
   410  	return 0, errV1NotImplemented
   411  }
   412  
   413  func (pfc *putFileClientV2) PutFileURL(repo, commit, path, url string, recursive bool, overwrite bool) error {
   414  	// TODO: Add URL support.
   415  	return errV1NotImplemented
   416  }
   417  
   418  func (pfc *putFileClientV2) DeleteFile(repo, commit, path string) error {
   419  	return pfc.c.DeleteFilesV2(repo, commit, []string{path})
   420  }
   421  
   422  func (pfc *putFileClientV2) Close() error {
   423  	return nil
   424  }