github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/transform/transform_test.go (about)

     1  package transform
     2  
     3  import (
     4  	"bytes"
     5  	"context"
     6  	"fmt"
     7  	"os"
     8  	"strings"
     9  	"testing"
    10  	"time"
    11  
    12  	"github.com/gogo/protobuf/types"
    13  	"golang.org/x/sync/errgroup"
    14  
    15  	"github.com/pachyderm/pachyderm/src/client"
    16  	"github.com/pachyderm/pachyderm/src/client/pfs"
    17  	"github.com/pachyderm/pachyderm/src/client/pkg/errors"
    18  	"github.com/pachyderm/pachyderm/src/client/pkg/require"
    19  	"github.com/pachyderm/pachyderm/src/client/pps"
    20  	pfsserver "github.com/pachyderm/pachyderm/src/server/pfs/server"
    21  	"github.com/pachyderm/pachyderm/src/server/pkg/backoff"
    22  	col "github.com/pachyderm/pachyderm/src/server/pkg/collection"
    23  	"github.com/pachyderm/pachyderm/src/server/pkg/obj"
    24  	"github.com/pachyderm/pachyderm/src/server/pkg/ppsutil"
    25  	"github.com/pachyderm/pachyderm/src/server/pkg/transactionenv/txncontext"
    26  	"github.com/pachyderm/pachyderm/src/server/pkg/uuid"
    27  	"github.com/pachyderm/pachyderm/src/server/pkg/work"
    28  )
    29  
    30  func withWorkerSpawnerPair(pipelineInfo *pps.PipelineInfo, cb func(env *testEnv) error) error {
    31  	// We only support simple pfs input pipelines in this test suite at the moment
    32  	if pipelineInfo.Input == nil || pipelineInfo.Input.Pfs == nil {
    33  		return errors.New("invalid pipeline, only a single PFS input is supported")
    34  	}
    35  
    36  	var eg *errgroup.Group
    37  
    38  	err := withTestEnv(pipelineInfo, func(env *testEnv) error {
    39  		var ctx context.Context
    40  		eg, ctx = errgroup.WithContext(env.driver.PachClient().Ctx())
    41  		env.driver = env.driver.WithContext(ctx)
    42  		env.PachClient = env.driver.PachClient()
    43  
    44  		// Set env vars that the object storage layer expects in the env
    45  		// This is global but it should be fine because all tests use the same value.
    46  		if err := os.Setenv(obj.StorageBackendEnvVar, obj.Local); err != nil {
    47  			return err
    48  		}
    49  
    50  		if err := os.MkdirAll(env.LocalStorageDirectory, 0777); err != nil {
    51  			return err
    52  		}
    53  		// TODO: this is global and complicates running tests in parallel
    54  		if err := os.Setenv(pfsserver.PachRootEnvVar, env.LocalStorageDirectory); err != nil {
    55  			return err
    56  		}
    57  
    58  		// Set up repos and branches for the pipeline
    59  		input := pipelineInfo.Input.Pfs
    60  		if err := env.PachClient.CreateRepo(input.Repo); err != nil {
    61  			return err
    62  		}
    63  		if err := env.PachClient.CreateBranch(input.Repo, input.Branch, "", nil); err != nil {
    64  			return err
    65  		}
    66  
    67  		if err := env.PachClient.CreateBranch(pipelineInfo.SpecCommit.Repo.Name, pipelineInfo.Pipeline.Name, "", nil); err != nil {
    68  			return err
    69  		}
    70  		commit, err := env.PachClient.StartCommit(pipelineInfo.SpecCommit.Repo.Name, pipelineInfo.Pipeline.Name)
    71  		if err != nil {
    72  			return err
    73  		}
    74  		pipelineInfo.SpecCommit = commit
    75  		if err := env.PachClient.FinishCommit(pipelineInfo.SpecCommit.Repo.Name, commit.ID); err != nil {
    76  			return err
    77  		}
    78  		if err := env.PachClient.CreateRepo(pipelineInfo.Pipeline.Name); err != nil {
    79  			return err
    80  		}
    81  		if err := env.PachClient.CreateBranch(
    82  			pipelineInfo.Pipeline.Name,
    83  			pipelineInfo.OutputBranch,
    84  			"",
    85  			[]*pfs.Branch{
    86  				client.NewBranch(input.Repo, input.Branch),
    87  				client.NewBranch(pipelineInfo.SpecCommit.Repo.Name, pipelineInfo.Pipeline.Name),
    88  			},
    89  		); err != nil {
    90  			return err
    91  		}
    92  
    93  		// Put the pipeline info into etcd (which is read by the master)
    94  		if _, err = env.driver.NewSTM(func(stm col.STM) error {
    95  			etcdPipelineInfo := &pps.EtcdPipelineInfo{
    96  				State:       pps.PipelineState_PIPELINE_STARTING,
    97  				SpecCommit:  pipelineInfo.SpecCommit,
    98  				Parallelism: 1,
    99  			}
   100  			return env.driver.Pipelines().ReadWrite(stm).Put(pipelineInfo.Pipeline.Name, etcdPipelineInfo)
   101  		}); err != nil {
   102  			return err
   103  		}
   104  
   105  		eg.Go(func() error {
   106  			err := Run(env.driver, env.logger)
   107  			if err != nil && errors.Is(err, context.Canceled) {
   108  				return nil
   109  			}
   110  			return err
   111  		})
   112  
   113  		eg.Go(func() error {
   114  			err := backoff.RetryUntilCancel(env.driver.PachClient().Ctx(), func() error {
   115  				return env.driver.NewTaskWorker().Run(
   116  					env.driver.PachClient().Ctx(),
   117  					func(ctx context.Context, subtask *work.Task) error {
   118  						status := &Status{}
   119  						return Worker(env.driver, env.logger, subtask, status)
   120  					},
   121  				)
   122  			}, &backoff.ZeroBackOff{}, func(err error, d time.Duration) error {
   123  				env.logger.Logf("worker failed, retrying immediately, err: %v", err)
   124  				return nil
   125  			})
   126  			if err != nil && errors.Is(err, context.Canceled) {
   127  				return nil
   128  			}
   129  			return err
   130  		})
   131  
   132  		return cb(env)
   133  	})
   134  
   135  	workerSpawnerErr := eg.Wait()
   136  	if workerSpawnerErr != nil && errors.Is(workerSpawnerErr, context.Canceled) {
   137  		return workerSpawnerErr
   138  	}
   139  	return err
   140  }
   141  
   142  func withTimeout(ctx context.Context, duration time.Duration) context.Context {
   143  	// Create a context that the caller can wait on
   144  	ctx, cancel := context.WithCancel(ctx)
   145  
   146  	go func() {
   147  		select {
   148  		case <-ctx.Done():
   149  		case <-time.After(duration):
   150  			fmt.Printf("Canceling test after timeout\n")
   151  			cancel()
   152  		}
   153  	}()
   154  
   155  	return ctx
   156  }
   157  
   158  func mockBasicJob(t *testing.T, env *testEnv, pi *pps.PipelineInfo) (context.Context, *pps.EtcdJobInfo) {
   159  	// Create a context that the caller can wait on
   160  	ctx, cancel := context.WithCancel(env.PachClient.Ctx())
   161  
   162  	// Mock out the initial ListJob, CreateJob, and InspectJob calls
   163  	etcdJobInfo := &pps.EtcdJobInfo{Job: client.NewJob(uuid.NewWithoutDashes())}
   164  
   165  	// TODO: use a 'real' pps if we can make one that doesn't need a real kube client
   166  	env.MockPachd.PPS.ListJobStream.Use(func(*pps.ListJobRequest, pps.API_ListJobStreamServer) error {
   167  		return nil
   168  	})
   169  
   170  	env.MockPachd.PPS.CreateJob.Use(func(ctx context.Context, request *pps.CreateJobRequest) (*pps.Job, error) {
   171  		etcdJobInfo.OutputCommit = request.OutputCommit
   172  		etcdJobInfo.Pipeline = request.Pipeline
   173  		etcdJobInfo.Stats = request.Stats
   174  		etcdJobInfo.Restart = request.Restart
   175  		etcdJobInfo.DataProcessed = request.DataProcessed
   176  		etcdJobInfo.DataSkipped = request.DataSkipped
   177  		etcdJobInfo.DataTotal = request.DataTotal
   178  		etcdJobInfo.DataFailed = request.DataFailed
   179  		etcdJobInfo.DataRecovered = request.DataRecovered
   180  		etcdJobInfo.StatsCommit = request.StatsCommit
   181  		etcdJobInfo.Started = request.Started
   182  		etcdJobInfo.Finished = request.Finished
   183  		return etcdJobInfo.Job, nil
   184  	})
   185  
   186  	env.MockPachd.PPS.InspectJob.Use(func(ctx context.Context, request *pps.InspectJobRequest) (*pps.JobInfo, error) {
   187  		outputCommitInfo, err := env.PachClient.InspectCommit(etcdJobInfo.OutputCommit.Repo.Name, etcdJobInfo.OutputCommit.ID)
   188  		require.NoError(t, err)
   189  
   190  		return &pps.JobInfo{
   191  			Job:              etcdJobInfo.Job,
   192  			Pipeline:         etcdJobInfo.Pipeline,
   193  			OutputRepo:       &pfs.Repo{Name: etcdJobInfo.Pipeline.Name},
   194  			OutputCommit:     etcdJobInfo.OutputCommit,
   195  			Restart:          etcdJobInfo.Restart,
   196  			DataProcessed:    etcdJobInfo.DataProcessed,
   197  			DataSkipped:      etcdJobInfo.DataSkipped,
   198  			DataTotal:        etcdJobInfo.DataTotal,
   199  			DataFailed:       etcdJobInfo.DataFailed,
   200  			DataRecovered:    etcdJobInfo.DataRecovered,
   201  			Stats:            etcdJobInfo.Stats,
   202  			StatsCommit:      etcdJobInfo.StatsCommit,
   203  			State:            etcdJobInfo.State,
   204  			Reason:           etcdJobInfo.Reason,
   205  			Started:          etcdJobInfo.Started,
   206  			Finished:         etcdJobInfo.Finished,
   207  			Transform:        pi.Transform,
   208  			PipelineVersion:  pi.Version,
   209  			ParallelismSpec:  pi.ParallelismSpec,
   210  			Egress:           pi.Egress,
   211  			Service:          pi.Service,
   212  			Spout:            pi.Spout,
   213  			OutputBranch:     pi.OutputBranch,
   214  			ResourceRequests: pi.ResourceRequests,
   215  			ResourceLimits:   pi.ResourceLimits,
   216  			Input:            ppsutil.JobInput(pi, outputCommitInfo),
   217  			EnableStats:      pi.EnableStats,
   218  			Salt:             pi.Salt,
   219  			ChunkSpec:        pi.ChunkSpec,
   220  			DatumTimeout:     pi.DatumTimeout,
   221  			JobTimeout:       pi.JobTimeout,
   222  			DatumTries:       pi.DatumTries,
   223  			SchedulingSpec:   pi.SchedulingSpec,
   224  			PodSpec:          pi.PodSpec,
   225  			PodPatch:         pi.PodPatch,
   226  		}, nil
   227  	})
   228  
   229  	updateJobState := func(request *pps.UpdateJobStateRequest) {
   230  		if ppsutil.IsTerminal(etcdJobInfo.State) {
   231  			return
   232  		}
   233  
   234  		etcdJobInfo.State = request.State
   235  		etcdJobInfo.Reason = request.Reason
   236  
   237  		// If setting the job to a terminal state, we are done
   238  		if ppsutil.IsTerminal(request.State) {
   239  			cancel()
   240  		}
   241  	}
   242  
   243  	env.MockPPSTransactionServer.UpdateJobStateInTransaction.Use(func(txnctx *txncontext.TransactionContext, request *pps.UpdateJobStateRequest) error {
   244  		updateJobState(request)
   245  		return nil
   246  	})
   247  
   248  	env.MockPachd.PPS.UpdateJobState.Use(func(ctx context.Context, request *pps.UpdateJobStateRequest) (*types.Empty, error) {
   249  		updateJobState(request)
   250  		return &types.Empty{}, nil
   251  	})
   252  
   253  	return ctx, etcdJobInfo
   254  }
   255  
   256  type inputFile struct {
   257  	path     string
   258  	contents string
   259  }
   260  
   261  func newInput(path string, contents string) *inputFile {
   262  	return &inputFile{
   263  		path:     path,
   264  		contents: contents,
   265  	}
   266  }
   267  
   268  func triggerJob(t *testing.T, env *testEnv, pi *pps.PipelineInfo, files []*inputFile) {
   269  	pfc, err := env.PachClient.NewPutFileClient()
   270  	require.NoError(t, err)
   271  
   272  	for _, f := range files {
   273  		_, err := pfc.PutFile(pi.Input.Pfs.Repo, "master", f.path, strings.NewReader(f.contents))
   274  		require.NoError(t, err)
   275  	}
   276  	require.NoError(t, pfc.Close())
   277  
   278  	inputCommitInfo, err := env.PachClient.InspectCommit(pi.Input.Pfs.Repo, "master")
   279  	require.NoError(t, err)
   280  	require.Equal(t, int64(1), inputCommitInfo.SubvenantCommitsTotal)
   281  	require.Equal(t, inputCommitInfo.Subvenance[0].Lower, inputCommitInfo.Subvenance[0].Upper)
   282  
   283  	outputCommit := inputCommitInfo.Subvenance[0].Lower
   284  	require.Equal(t, pi.Pipeline.Name, outputCommit.Repo.Name)
   285  }
   286  
   287  func TestJobSuccess(t *testing.T) {
   288  	pi := defaultPipelineInfo()
   289  	err := withWorkerSpawnerPair(pi, func(env *testEnv) error {
   290  		env.logger.Writer = os.Stdout
   291  		ctx, etcdJobInfo := mockBasicJob(t, env, pi)
   292  		triggerJob(t, env, pi, []*inputFile{newInput("file", "foobar")})
   293  		ctx = withTimeout(ctx, 10*time.Second)
   294  		<-ctx.Done()
   295  		require.Equal(t, pps.JobState_JOB_SUCCESS, etcdJobInfo.State)
   296  
   297  		// Ensure the output commit is successful
   298  		outputCommitID := etcdJobInfo.OutputCommit.ID
   299  		outputCommitInfo, err := env.PachClient.InspectCommit(pi.Pipeline.Name, outputCommitID)
   300  		require.NoError(t, err)
   301  		require.NotNil(t, outputCommitInfo.Finished)
   302  
   303  		branchInfo, err := env.PachClient.InspectBranch(pi.Pipeline.Name, pi.OutputBranch)
   304  		require.NoError(t, err)
   305  		require.NotNil(t, branchInfo)
   306  
   307  		// Find the output file in the output branch
   308  		files, err := env.PachClient.ListFile(pi.Pipeline.Name, pi.OutputBranch, "/")
   309  		require.NoError(t, err)
   310  		require.Equal(t, 1, len(files))
   311  		require.Equal(t, "/file", files[0].File.Path)
   312  		require.Equal(t, uint64(6), files[0].SizeBytes)
   313  
   314  		buffer := &bytes.Buffer{}
   315  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/file", 0, 0, buffer)
   316  		require.NoError(t, err)
   317  		require.Equal(t, "foobar", buffer.String())
   318  
   319  		return nil
   320  	})
   321  	require.NoError(t, err)
   322  }
   323  
   324  func TestJobFailedDatum(t *testing.T) {
   325  	pi := defaultPipelineInfo()
   326  	pi.Transform.Cmd = []string{"bash", "-c", "(exit 1)"}
   327  	err := withWorkerSpawnerPair(pi, func(env *testEnv) error {
   328  		ctx, etcdJobInfo := mockBasicJob(t, env, pi)
   329  		triggerJob(t, env, pi, []*inputFile{newInput("file", "foobar")})
   330  		ctx = withTimeout(ctx, 10*time.Second)
   331  		<-ctx.Done()
   332  		require.Equal(t, pps.JobState_JOB_FAILURE, etcdJobInfo.State)
   333  		// TODO: check job stats
   334  		return nil
   335  	})
   336  	require.NoError(t, err)
   337  }
   338  
   339  func TestJobMultiDatum(t *testing.T) {
   340  	pi := defaultPipelineInfo()
   341  	err := withWorkerSpawnerPair(pi, func(env *testEnv) error {
   342  		ctx, etcdJobInfo := mockBasicJob(t, env, pi)
   343  		triggerJob(t, env, pi, []*inputFile{newInput("a", "foobar"), newInput("b", "barfoo")})
   344  		ctx = withTimeout(ctx, 10*time.Second)
   345  		<-ctx.Done()
   346  		require.Equal(t, pps.JobState_JOB_SUCCESS, etcdJobInfo.State)
   347  
   348  		// Ensure the output commit is successful
   349  		outputCommitID := etcdJobInfo.OutputCommit.ID
   350  		outputCommitInfo, err := env.PachClient.InspectCommit(pi.Pipeline.Name, outputCommitID)
   351  		require.NoError(t, err)
   352  		require.NotNil(t, outputCommitInfo.Finished)
   353  
   354  		branchInfo, err := env.PachClient.InspectBranch(pi.Pipeline.Name, pi.OutputBranch)
   355  		require.NoError(t, err)
   356  		require.NotNil(t, branchInfo)
   357  
   358  		// Find the output file in the output branch
   359  		files, err := env.PachClient.ListFile(pi.Pipeline.Name, pi.OutputBranch, "/")
   360  		require.NoError(t, err)
   361  		require.Equal(t, 2, len(files))
   362  		require.Equal(t, "/a", files[0].File.Path)
   363  		require.Equal(t, uint64(6), files[0].SizeBytes)
   364  		require.Equal(t, "/b", files[1].File.Path)
   365  		require.Equal(t, uint64(6), files[1].SizeBytes)
   366  
   367  		buffer := &bytes.Buffer{}
   368  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/a", 0, 0, buffer)
   369  		require.NoError(t, err)
   370  		require.Equal(t, "foobar", buffer.String())
   371  
   372  		buffer = &bytes.Buffer{}
   373  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/b", 0, 0, buffer)
   374  		require.NoError(t, err)
   375  		require.Equal(t, "barfoo", buffer.String())
   376  
   377  		return nil
   378  	})
   379  	require.NoError(t, err)
   380  }
   381  
   382  func TestJobSerial(t *testing.T) {
   383  	pi := defaultPipelineInfo()
   384  	err := withWorkerSpawnerPair(pi, func(env *testEnv) error {
   385  		ctx, etcdJobInfo := mockBasicJob(t, env, pi)
   386  		triggerJob(t, env, pi, []*inputFile{newInput("a", "foobar")})
   387  		ctx = withTimeout(ctx, 10*time.Second)
   388  		<-ctx.Done()
   389  		require.Equal(t, pps.JobState_JOB_SUCCESS, etcdJobInfo.State)
   390  
   391  		ctx, etcdJobInfo = mockBasicJob(t, env, pi)
   392  		triggerJob(t, env, pi, []*inputFile{newInput("b", "barfoo")})
   393  		ctx = withTimeout(ctx, 10*time.Second)
   394  		<-ctx.Done()
   395  		require.Equal(t, pps.JobState_JOB_SUCCESS, etcdJobInfo.State)
   396  
   397  		// Ensure the output commit is successful
   398  		outputCommitID := etcdJobInfo.OutputCommit.ID
   399  		outputCommitInfo, err := env.PachClient.InspectCommit(pi.Pipeline.Name, outputCommitID)
   400  		require.NoError(t, err)
   401  		require.NotNil(t, outputCommitInfo.Finished)
   402  
   403  		branchInfo, err := env.PachClient.InspectBranch(pi.Pipeline.Name, pi.OutputBranch)
   404  		require.NoError(t, err)
   405  		require.NotNil(t, branchInfo)
   406  
   407  		// Find the output file in the output branch
   408  		files, err := env.PachClient.ListFile(pi.Pipeline.Name, pi.OutputBranch, "/")
   409  		require.NoError(t, err)
   410  		require.Equal(t, 2, len(files))
   411  		require.Equal(t, "/a", files[0].File.Path)
   412  		require.Equal(t, uint64(6), files[0].SizeBytes)
   413  		require.Equal(t, "/b", files[1].File.Path)
   414  		require.Equal(t, uint64(6), files[1].SizeBytes)
   415  
   416  		buffer := &bytes.Buffer{}
   417  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/a", 0, 0, buffer)
   418  		require.NoError(t, err)
   419  		require.Equal(t, "foobar", buffer.String())
   420  
   421  		buffer = &bytes.Buffer{}
   422  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/b", 0, 0, buffer)
   423  		require.NoError(t, err)
   424  		require.Equal(t, "barfoo", buffer.String())
   425  
   426  		return nil
   427  	})
   428  	require.NoError(t, err)
   429  }
   430  
   431  // TestJobEgress is identical to TestJobSuccess except it includes the egress
   432  // stage. The implementation of egress is mocked to short-circuit success, so
   433  // this really just tests the state machine progresses all the way through.
   434  func TestJobEgress(t *testing.T) {
   435  	pi := defaultPipelineInfo()
   436  	pi.Egress = &pps.Egress{URL: "http://example.com"}
   437  	err := withWorkerSpawnerPair(pi, func(env *testEnv) error {
   438  		ctx, etcdJobInfo := mockBasicJob(t, env, pi)
   439  		triggerJob(t, env, pi, []*inputFile{newInput("a", "foobar")})
   440  		ctx = withTimeout(ctx, 10*time.Second)
   441  		<-ctx.Done()
   442  		require.Equal(t, pps.JobState_JOB_SUCCESS, etcdJobInfo.State)
   443  
   444  		// Ensure the output commit is successful
   445  		outputCommitID := etcdJobInfo.OutputCommit.ID
   446  		outputCommitInfo, err := env.PachClient.InspectCommit(pi.Pipeline.Name, outputCommitID)
   447  		require.NoError(t, err)
   448  		require.NotNil(t, outputCommitInfo.Finished)
   449  
   450  		branchInfo, err := env.PachClient.InspectBranch(pi.Pipeline.Name, pi.OutputBranch)
   451  		require.NoError(t, err)
   452  		require.NotNil(t, branchInfo)
   453  
   454  		// Find the output file in the output branch
   455  		files, err := env.PachClient.ListFile(pi.Pipeline.Name, pi.OutputBranch, "/")
   456  		require.NoError(t, err)
   457  		require.Equal(t, 1, len(files))
   458  		require.Equal(t, "/a", files[0].File.Path)
   459  		require.Equal(t, uint64(6), files[0].SizeBytes)
   460  
   461  		buffer := &bytes.Buffer{}
   462  		err = env.PachClient.GetFile(pi.Pipeline.Name, pi.OutputBranch, "/a", 0, 0, buffer)
   463  		require.NoError(t, err)
   464  		require.Equal(t, "foobar", buffer.String())
   465  
   466  		return nil
   467  	})
   468  	require.NoError(t, err)
   469  }