github.com/pachyderm/pachyderm@v1.13.4/src/server/worker/pipeline/transform/common_test.go (about) 1 package transform 2 3 import ( 4 "context" 5 "path/filepath" 6 "time" 7 8 etcd "github.com/coreos/etcd/clientv3" 9 "github.com/gogo/protobuf/types" 10 11 "github.com/pachyderm/pachyderm/src/client" 12 "github.com/pachyderm/pachyderm/src/client/pfs" 13 "github.com/pachyderm/pachyderm/src/client/pps" 14 col "github.com/pachyderm/pachyderm/src/server/pkg/collection" 15 "github.com/pachyderm/pachyderm/src/server/pkg/hashtree" 16 "github.com/pachyderm/pachyderm/src/server/pkg/ppsconsts" 17 "github.com/pachyderm/pachyderm/src/server/pkg/testpachd" 18 "github.com/pachyderm/pachyderm/src/server/pkg/work" 19 "github.com/pachyderm/pachyderm/src/server/worker/cache" 20 "github.com/pachyderm/pachyderm/src/server/worker/common" 21 "github.com/pachyderm/pachyderm/src/server/worker/driver" 22 "github.com/pachyderm/pachyderm/src/server/worker/logs" 23 ) 24 25 func defaultPipelineInfo() *pps.PipelineInfo { 26 name := "testPipeline" 27 return &pps.PipelineInfo{ 28 Pipeline: client.NewPipeline(name), 29 OutputBranch: "master", 30 Transform: &pps.Transform{ 31 Cmd: []string{"cp", "inputRepo/*", "out"}, 32 WorkingDir: client.PPSInputPrefix, 33 }, 34 ParallelismSpec: &pps.ParallelismSpec{ 35 Constant: 1, 36 }, 37 ResourceRequests: &pps.ResourceSpec{ 38 Memory: "100M", 39 Cpu: 0.5, 40 }, 41 Input: &pps.Input{ 42 Pfs: &pps.PFSInput{ 43 Name: "inputRepo", 44 Repo: "inputRepo", 45 Branch: "master", 46 Glob: "/*", 47 }, 48 }, 49 SpecCommit: client.NewCommit(ppsconsts.SpecRepo, name), 50 } 51 } 52 53 type testEnv struct { 54 *testpachd.RealEnv 55 logger *logs.MockLogger 56 driver driver.Driver 57 } 58 59 // testDriver is identical to a real driver except it overloads egress, which is 60 // tricky to do in a test environment 61 type testDriver struct { 62 inner driver.Driver 63 } 64 65 // Fuck golang 66 func (td *testDriver) Jobs() col.Collection { 67 return td.inner.Jobs() 68 } 69 func (td *testDriver) Pipelines() col.Collection { 70 return td.inner.Pipelines() 71 } 72 func (td *testDriver) NewTaskWorker() *work.Worker { 73 return td.inner.NewTaskWorker() 74 } 75 func (td *testDriver) NewTaskQueue() (*work.TaskQueue, error) { 76 return td.inner.NewTaskQueue() 77 } 78 func (td *testDriver) PipelineInfo() *pps.PipelineInfo { 79 return td.inner.PipelineInfo() 80 } 81 func (td *testDriver) Namespace() string { 82 return td.inner.Namespace() 83 } 84 func (td *testDriver) InputDir() string { 85 return td.inner.InputDir() 86 } 87 func (td *testDriver) PachClient() *client.APIClient { 88 return td.inner.PachClient() 89 } 90 func (td *testDriver) ExpectedNumWorkers() (int64, error) { 91 return td.inner.ExpectedNumWorkers() 92 } 93 func (td *testDriver) NumShards() int64 { 94 return td.inner.NumShards() 95 } 96 func (td *testDriver) WithContext(ctx context.Context) driver.Driver { 97 return &testDriver{td.inner.WithContext(ctx)} 98 } 99 func (td *testDriver) WithData(inputs []*common.Input, tree *hashtree.Ordered, logger logs.TaggedLogger, cb func(string, *pps.ProcessStats) error) (*pps.ProcessStats, error) { 100 return td.inner.WithData(inputs, tree, logger, cb) 101 } 102 func (td *testDriver) WithActiveData(inputs []*common.Input, dir string, cb func() error) error { 103 return td.inner.WithActiveData(inputs, dir, cb) 104 } 105 func (td *testDriver) UserCodeEnv(job string, commit *pfs.Commit, inputs []*common.Input) []string { 106 return td.inner.UserCodeEnv(job, commit, inputs) 107 } 108 func (td *testDriver) RunUserCode(logger logs.TaggedLogger, env []string, stats *pps.ProcessStats, d *types.Duration) error { 109 return td.inner.RunUserCode(logger, env, stats, d) 110 } 111 func (td *testDriver) RunUserErrorHandlingCode(logger logs.TaggedLogger, env []string, stats *pps.ProcessStats, d *types.Duration) error { 112 return td.inner.RunUserErrorHandlingCode(logger, env, stats, d) 113 } 114 func (td *testDriver) DeleteJob(stm col.STM, ji *pps.EtcdJobInfo) error { 115 return td.inner.DeleteJob(stm, ji) 116 } 117 func (td *testDriver) UpdateJobState(job string, state pps.JobState, reason string) error { 118 return td.inner.UpdateJobState(job, state, reason) 119 } 120 func (td *testDriver) UploadOutput(dir string, tag string, logger logs.TaggedLogger, input []*common.Input, stats *pps.ProcessStats, tree *hashtree.Ordered) ([]byte, error) { 121 return td.inner.UploadOutput(dir, tag, logger, input, stats, tree) 122 } 123 func (td *testDriver) ReportUploadStats(t time.Time, stats *pps.ProcessStats, logger logs.TaggedLogger) { 124 td.inner.ReportUploadStats(t, stats, logger) 125 } 126 func (td *testDriver) NewSTM(cb func(col.STM) error) (*etcd.TxnResponse, error) { 127 return td.inner.NewSTM(cb) 128 } 129 func (td *testDriver) ChunkCaches() cache.WorkerCache { 130 return td.inner.ChunkCaches() 131 } 132 func (td *testDriver) ChunkStatsCaches() cache.WorkerCache { 133 return td.inner.ChunkStatsCaches() 134 } 135 func (td *testDriver) WithDatumCache(cb func(*hashtree.MergeCache, *hashtree.MergeCache) error) error { 136 return td.inner.WithDatumCache(cb) 137 } 138 139 func (td *testDriver) Egress(commit *pfs.Commit, egressURL string) error { 140 return nil 141 } 142 143 // withTestEnv provides a test env with etcd and pachd instances and connected 144 // clients, plus a worker driver for performing worker operations. 145 func withTestEnv(pipelineInfo *pps.PipelineInfo, cb func(*testEnv) error) error { 146 return testpachd.WithRealEnv(func(realEnv *testpachd.RealEnv) error { 147 logger := logs.NewMockLogger() 148 workerDir := filepath.Join(realEnv.Directory, "worker") 149 driver, err := driver.NewDriver( 150 pipelineInfo, 151 realEnv.PachClient, 152 realEnv.EtcdClient, 153 "/pachyderm_test", 154 filepath.Join(workerDir, "hashtrees"), 155 workerDir, 156 "namespace", 157 ) 158 if err != nil { 159 return err 160 } 161 162 ctx, cancel := context.WithCancel(realEnv.PachClient.Ctx()) 163 defer cancel() 164 driver = driver.WithContext(ctx) 165 166 env := &testEnv{ 167 RealEnv: realEnv, 168 logger: logger, 169 driver: &testDriver{driver}, 170 } 171 172 return cb(env) 173 }) 174 }